diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index f379c103b6940d..6eff6bd7b7c203 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -51,6 +51,7 @@ #include "llvm/Transforms/Coroutines.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/InstCombine/InstCombine.h" @@ -553,6 +554,16 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, std::unique_ptr TLII( createTLII(TargetTriple, CodeGenOpts)); + // If we reached here with a non-empty index file name, then the index file + // was empty and we are not performing ThinLTO backend compilation (used in + // testing in a distributed build environment). Drop any the type test + // assume sequences inserted for whole program vtables so that codegen doesn't + // complain. + if (!CodeGenOpts.ThinLTOIndexFile.empty()) + MPM.add(createLowerTypeTestsPass(/*ExportSummary=*/nullptr, + /*ImportSummary=*/nullptr, + /*DropTypeTests=*/true)); + PassManagerBuilderWrapper PMBuilder(TargetTriple, CodeGenOpts, LangOpts); // At O0 and O1 we only run the always inliner which is more efficient. At @@ -1114,6 +1125,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( bool IsLTO = CodeGenOpts.PrepareForLTO; if (CodeGenOpts.OptimizationLevel == 0) { + // If we reached here with a non-empty index file name, then the index + // file was empty and we are not performing ThinLTO backend compilation + // (used in testing in a distributed build environment). Drop any the type + // test assume sequences inserted for whole program vtables so that + // codegen doesn't complain. + if (!CodeGenOpts.ThinLTOIndexFile.empty()) + MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr, + /*ImportSummary=*/nullptr, + /*DropTypeTests=*/true)); if (Optional Options = getGCOVOptions(CodeGenOpts)) MPM.addPass(GCOVProfilerPass(*Options)); if (Optional Options = @@ -1150,6 +1170,18 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // configure the pipeline. PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts); + // If we reached here with a non-empty index file name, then the index + // file was empty and we are not performing ThinLTO backend compilation + // (used in testing in a distributed build environment). Drop any the type + // test assume sequences inserted for whole program vtables so that + // codegen doesn't complain. + if (!CodeGenOpts.ThinLTOIndexFile.empty()) + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { + MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr, + /*ImportSummary=*/nullptr, + /*DropTypeTests=*/true)); + }); + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { MPM.addPass(createModuleToFunctionPassAdaptor( EntryExitInstrumenterPass(/*PostInlining=*/false))); diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 3f3825b7627597..7389207bc8ad19 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2641,7 +2641,9 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, if (SanOpts.has(SanitizerKind::CFIVCall)) EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); else if (CGM.getCodeGenOpts().WholeProgramVTables && - CGM.HasHiddenLTOVisibility(RD)) { + // Don't insert type test assumes if we are forcing public std + // visibility. + !CGM.HasLTOVisibilityPublicStd(RD)) { llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); llvm::Value *TypeId = diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index cbc969a1ac373c..403b9e25f7a398 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -1011,6 +1011,26 @@ void CodeGenModule::EmitDeferredVTables() { DeferredVTables.clear(); } +bool CodeGenModule::HasLTOVisibilityPublicStd(const CXXRecordDecl *RD) { + if (!getCodeGenOpts().LTOVisibilityPublicStd) + return false; + + const DeclContext *DC = RD; + while (1) { + auto *D = cast(DC); + DC = DC->getParent(); + if (isa(DC->getRedeclContext())) { + if (auto *ND = dyn_cast(D)) + if (const IdentifierInfo *II = ND->getIdentifier()) + if (II->isStr("std") || II->isStr("stdext")) + return true; + break; + } + } + + return false; +} + bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { LinkageInfo LV = RD->getLinkageAndVisibility(); if (!isExternallyVisible(LV.getLinkage())) @@ -1027,22 +1047,7 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { return false; } - if (getCodeGenOpts().LTOVisibilityPublicStd) { - const DeclContext *DC = RD; - while (1) { - auto *D = cast(DC); - DC = DC->getParent(); - if (isa(DC->getRedeclContext())) { - if (auto *ND = dyn_cast(D)) - if (const IdentifierInfo *II = ND->getIdentifier()) - if (II->isStr("std") || II->isStr("stdext")) - return false; - break; - } - } - } - - return true; + return !HasLTOVisibilityPublicStd(RD); } llvm::GlobalObject::VCallVisibility diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 115e754bb39200..a711d5ccba0efb 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1292,6 +1292,11 @@ class CodeGenModule : public CodeGenTypeCache { /// optimization. bool HasHiddenLTOVisibility(const CXXRecordDecl *RD); + /// Returns whether the given record has public std LTO visibility + /// and therefore may not participate in (single-module) CFI and whole-program + /// vtable optimization. + bool HasLTOVisibilityPublicStd(const CXXRecordDecl *RD); + /// Returns the vcall visibility of the given type. This is the scope in which /// a virtual function call could be made which ends up being dispatched to a /// member function of this class. This scope can be wider than the visibility diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index b5b8702c551e5f..057c726e355e31 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -670,6 +670,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( CGM.HasHiddenLTOVisibility(RD); bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && CGM.HasHiddenLTOVisibility(RD); + bool ShouldEmitWPDInfo = + CGM.getCodeGenOpts().WholeProgramVTables && + // Don't insert type tests if we are forcing public std visibility. + !CGM.HasLTOVisibilityPublicStd(RD); llvm::Value *VirtualFn = nullptr; { @@ -677,8 +681,9 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( llvm::Value *TypeId = nullptr; llvm::Value *CheckResult = nullptr; - if (ShouldEmitCFICheck || ShouldEmitVFEInfo) { - // If doing CFI or VFE, we will need the metadata node to check against. + if (ShouldEmitCFICheck || ShouldEmitVFEInfo || ShouldEmitWPDInfo) { + // If doing CFI, VFE or WPD, we will need the metadata node to check + // against. llvm::Metadata *MD = CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); @@ -702,7 +707,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( } else { // When not doing VFE, emit a normal load, as it allows more // optimisations than type.checked.load. - if (ShouldEmitCFICheck) { + if (ShouldEmitCFICheck || ShouldEmitWPDInfo) { CheckResult = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::type_test), {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId}); @@ -713,7 +718,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( "memptr.virtualfn"); } assert(VirtualFn && "Virtual fuction pointer not created!"); - assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) && + assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || !ShouldEmitWPDInfo || + CheckResult) && "Check result required but not created!"); if (ShouldEmitCFICheck) { diff --git a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll index 2920cf19c81a94..b46845afba26b9 100644 --- a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll +++ b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll @@ -8,6 +8,7 @@ ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. ; RUN: llvm-lto2 run -thinlto-distributed-indexes %t.o \ +; RUN: -whole-program-visibility \ ; RUN: -verify-machineinstrs=0 \ ; RUN: -o %t2.index \ ; RUN: -r=%t.o,test,px \ diff --git a/clang/test/CodeGenCXX/cfi-mfcall.cpp b/clang/test/CodeGenCXX/cfi-mfcall.cpp index c16b20b8dcec8d..47f3a2616f6050 100644 --- a/clang/test/CodeGenCXX/cfi-mfcall.cpp +++ b/clang/test/CodeGenCXX/cfi-mfcall.cpp @@ -1,5 +1,8 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-mfcall -fsanitize-trap=cfi-mfcall -fvisibility hidden -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-mfcall -fsanitize-trap=cfi-mfcall -fvisibility default -emit-llvm -o - %s | FileCheck --check-prefix=DEFAULT %s +// With -fwhole-program-vtables we should get the member function pointer type +// test, even without hidden visibility. +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck %s --check-prefix=WPV struct B1 {}; struct B2 {}; @@ -9,6 +12,9 @@ struct S : B1, B3 {}; // DEFAULT-NOT: llvm.type.test void f(S *s, void (S::*p)()) { + // WPV: [[OFFSET:%.*]] = sub i64 {{.*}}, 1 + // WPV: [[VFPTR:%.*]] = getelementptr i8, i8* %{{.*}}, i64 [[OFFSET]] + // WPV: [[TT:%.*]] = call i1 @llvm.type.test(i8* [[VFPTR]], metadata !"_ZTSM1SFvvE.virtual") // CHECK: [[OFFSET:%.*]] = sub i64 {{.*}}, 1 // CHECK: [[VFPTR:%.*]] = getelementptr i8, i8* %{{.*}}, i64 [[OFFSET]] // CHECK: [[TT:%.*]] = call i1 @llvm.type.test(i8* [[VFPTR]], metadata !"_ZTSM1SFvvE.virtual") diff --git a/clang/test/CodeGenCXX/lto-visibility-inference.cpp b/clang/test/CodeGenCXX/lto-visibility-inference.cpp index 8e57ef5e0b892e..632b6e643f3018 100644 --- a/clang/test/CodeGenCXX/lto-visibility-inference.cpp +++ b/clang/test/CodeGenCXX/lto-visibility-inference.cpp @@ -70,20 +70,20 @@ void f(C1 *c1, C2 *c2, C3 *c3, C4 *c4, C5 *c5, C6 *c6, std::C7 *c7, // ITANIUM: type.test{{.*}}!"_ZTS2C1" // MS: type.test{{.*}}!"?AUC1@@" c1->f(); - // ITANIUM-NOT: type.test{{.*}}!"_ZTS2C2" + // ITANIUM: type.test{{.*}}!"_ZTS2C2" // MS: type.test{{.*}}!"?AUC2@@" c2->f(); // ITANIUM: type.test{{.*}}!"_ZTS2C3" - // MS-NOT: type.test{{.*}}!"?AUC3@@" + // MS: type.test{{.*}}!"?AUC3@@" c3->f(); // ITANIUM: type.test{{.*}}!"_ZTS2C4" - // MS-NOT: type.test{{.*}}!"?AUC4@@" + // MS: type.test{{.*}}!"?AUC4@@" c4->f(); - // ITANIUM-NOT: type.test{{.*}}!"_ZTS2C5" - // MS-NOT: type.test{{.*}}!"?AUC5@@" + // ITANIUM: type.test{{.*}}!"_ZTS2C5" + // MS: type.test{{.*}}!"?AUC5@@" c5->f(); - // ITANIUM-NOT: type.test{{.*}}!"_ZTS2C6" - // MS-NOT: type.test{{.*}}!"?AUC6@@" + // ITANIUM: type.test{{.*}}!"_ZTS2C6" + // MS: type.test{{.*}}!"?AUC6@@" c6->f(); // ITANIUM: type.test{{.*}}!"_ZTSSt2C7" // MS-STD: type.test{{.*}}!"?AUC7@std@@" diff --git a/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp b/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp new file mode 100644 index 00000000000000..a6c0da2e4c3caa --- /dev/null +++ b/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp @@ -0,0 +1,69 @@ +// Test distributed ThinLTO backend handling of type tests + +// REQUIRES: x86-registered-target + +// Ensure that a distributed backend invocation of ThinLTO lowers the type test +// as expected. +// RUN: %clang_cc1 -flto=thin -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm-bc -o %t.o %s +// RUN: llvm-dis %t.o -o - | FileCheck --check-prefix=TT %s +// RUN: llvm-lto -thinlto -o %t2 %t.o +// RUN: %clang -target x86_64-unknown-linux -O2 -o %t3.o -x ir %t.o -c -fthinlto-index=%t2.thinlto.bc -save-temps=obj +// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// llvm-nm %t3.o | FileCheck --check-prefix=NM %s + +// The pre-link bitcode produced by clang should contain a type test assume +// sequence. +// TT: [[TTREG:%[0-9]+]] = call i1 @llvm.type.test({{.*}}, metadata !"_ZTS1A") +// TT: void @llvm.assume(i1 [[TTREG]]) + +// The ThinLTO backend optimized bitcode should not have any type test assume +// sequences. +// OPT-NOT: @llvm.type.test +// OPT-NOT: call void @llvm.assume +// We should have only one @llvm.assume call, the one that was expanded +// from the builtin in the IR below, not the one fed by the type test. +// OPT: %cmp = icmp ne %struct.A* %{{.*}}, null +// OPT: void @llvm.assume(i1 %cmp) +// Check after the builtin assume again that we don't have a type test assume +// sequence. +// OPT-NOT: @llvm.type.test +// OPT-NOT: call void @llvm.assume + +// NM: T _Z2afP1A + +// Also check type test are lowered when the distributed ThinLTO backend clang +// invocation is passed an empty index file, in which case a non-ThinLTO +// compilation pipeline is invoked. If not lowered then LLVM CodeGen may assert. +// RUN: touch %t4.thinlto.bc +// O2 old PM +// RUN: %clang -target x86_64-unknown-linux -O2 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj +// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// llvm-nm %t4.o | FileCheck --check-prefix=NM %s +// O2 new PM +// RUN: %clang -target x86_64-unknown-linux -O2 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -fexperimental-new-pass-manager -save-temps=obj +// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// llvm-nm %t4.o | FileCheck --check-prefix=NM %s +// O0 new PM +// RUN: %clang -target x86_64-unknown-linux -O0 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -fexperimental-new-pass-manager -save-temps=obj +// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// llvm-nm %t4.o | FileCheck --check-prefix=NM %s + +struct A { + A(); + virtual void f(); +}; + +struct B : virtual A { + B(); +}; + +A::A() {} +B::B() {} + +void A::f() { +} + +void af(A *a) { + __builtin_assume(a != 0); + a->f(); +} diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp index a7a34673cdfcb5..05731f15b9df5d 100644 --- a/clang/test/CodeGenCXX/type-metadata.cpp +++ b/clang/test/CodeGenCXX/type-metadata.cpp @@ -6,6 +6,7 @@ // Tests for the whole-program-vtables feature: // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility hidden -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM --check-prefix=TT-ITANIUM %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM-DEFAULTVIS --check-prefix=TT-ITANIUM %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=TT-MS %s // Tests for cfi + whole-program-vtables: @@ -129,6 +130,7 @@ void D::h() { } // ITANIUM: define hidden void @_Z2afP1A +// ITANIUM-DEFAULTVIS: define void @_Z2afP1A // MS: define dso_local void @"?af@@YAXPEAUA@@@Z" void af(A *a) { // TT-ITANIUM: [[P:%[^ ]*]] = call i1 @llvm.type.test(i8* [[VT:%[^ ]*]], metadata !"_ZTS1A") @@ -239,6 +241,7 @@ struct D : C { }; // ITANIUM: define hidden void @_ZN5test21fEPNS_1DE +// ITANIUM-DEFAULTVIS: define void @_ZN5test21fEPNS_1DE // MS: define dso_local void @"?f@test2@@YAXPEAUD@1@@Z" void f(D *d) { // TT-ITANIUM: {{%[^ ]*}} = call i1 @llvm.type.test(i8* {{%[^ ]*}}, metadata !"_ZTSN5test21DE") diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index ef1edbcd199240..6655ab7a774a52 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -165,6 +165,7 @@ struct Configuration { bool ltoCSProfileGenerate; bool ltoDebugPassManager; bool ltoNewPassManager; + bool ltoWholeProgramVisibility; bool mergeArmExidx; bool mipsN32Abi = false; bool mmapOutputFile; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index e85183e14e24f3..eb2fc4f194594c 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -899,6 +899,8 @@ static void readConfigs(opt::InputArgList &args) { config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); config->ltoNewPassManager = args.hasArg(OPT_lto_new_pass_manager); config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes); + config->ltoWholeProgramVisibility = + args.hasArg(OPT_lto_whole_program_visibility); config->ltoo = args::getInteger(args, OPT_lto_O, 2); config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 2148ac5002916c..d8e343c48871f0 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -111,6 +111,8 @@ static lto::Config createConfig() { c.DebugPassManager = config->ltoDebugPassManager; c.DwoDir = config->dwoDir; + c.HasWholeProgramVisibility = config->ltoWholeProgramVisibility; + c.CSIRProfile = config->ltoCSProfileFile; c.RunCSIRInstr = config->ltoCSProfileGenerate; diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index ea78a35262110e..f45ad5a0af28cd 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -479,6 +479,8 @@ def lto_cs_profile_file: J<"lto-cs-profile-file=">, def lto_obj_path_eq: J<"lto-obj-path=">; def lto_sample_profile: J<"lto-sample-profile=">, HelpText<"Sample profile file path">; +def lto_whole_program_visibility: F<"lto-whole-program-visibility">, + HelpText<"Asserts that the LTO link has whole program visibility">; def disable_verify: F<"disable-verify">; defm mllvm: Eq<"mllvm", "Additional arguments to forward to LLVM's option processing">; def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">, diff --git a/lld/test/ELF/lto/devirt_vcall_vis_public.ll b/lld/test/ELF/lto/devirt_vcall_vis_public.ll new file mode 100644 index 00000000000000..f07297d5863caf --- /dev/null +++ b/lld/test/ELF/lto/devirt_vcall_vis_public.ll @@ -0,0 +1,128 @@ +; REQUIRES: x86 +; Test that -lto-whole-program-visibility enables devirtualization. + +; Index based WPD +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t2.o %s +; RUN: ld.lld %t2.o -o %t3 -save-temps -lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Hybrid WPD +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s +; RUN: ld.lld %t.o -o %t3 -save-temps -lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Regular LTO WPD +; RUN: opt -o %t4.o %s +; RUN: ld.lld %t4.o -o %t3 -save-temps -lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi + +; Try everything again but without -whole-program-visibility to confirm +; WPD fails + +; Index based WPD +; RUN: ld.lld %t2.o -o %t3 -save-temps \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Hybrid WPD +; RUN: ld.lld %t.o -o %t3 -save-temps \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Regular LTO WPD +; RUN: ld.lld %t4.o -o %t3 -save-temps \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5 +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5 + + +; CHECK-IR-LABEL: define dso_local i32 @_start +define i32 @_start(%struct.A* %obj, %struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; CHECK-NODEVIRT-IR: %call = tail call i32 %fptr1 + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + ; CHECK-NODEVIRT-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !4) + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + ; CHECK-NODEVIRT-IR: %call4 = tail call i32 %fptr33 + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !4} +!4 = distinct !{} +!5 = !{i64 0} diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index 50147300f7f77d..2ca7ad4fda04ab 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -61,6 +61,10 @@ struct Config { /// Run PGO context sensitive IR instrumentation. bool RunCSIRInstr = false; + /// Asserts whether we can assume whole program visibility during the LTO + /// link. + bool HasWholeProgramVisibility = false; + /// If this field is set, the set of passes run in the middle-end optimizer /// will be the one specified by the string. Only works with the new pass /// manager as the old one doesn't have this ability. diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index de0c80f5b19acc..daf44822aeb4d7 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -236,12 +236,15 @@ enum class PassSummaryAction { /// The behavior depends on the summary arguments: /// - If ExportSummary is non-null, this pass will export type identifiers to /// the given summary. -/// - Otherwise, if ImportSummary is non-null, this pass will import type -/// identifiers from the given summary. -/// - Otherwise it does neither. -/// It is invalid for both ExportSummary and ImportSummary to be non-null. +/// - If ImportSummary is non-null, this pass will import type identifiers from +/// the given summary. +/// - Otherwise, if both are null and DropTypeTests is true, all type test +/// assume sequences will be removed from the IR. +/// It is invalid for both ExportSummary and ImportSummary to be non-null +/// unless DropTypeTests is true. ModulePass *createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary); + const ModuleSummaryIndex *ImportSummary, + bool DropTypeTests = false); /// This pass export CFI checks for use by external modules. ModulePass *createCrossDSOCFIPass(); diff --git a/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h b/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h index 3c2bb65b9552b9..5e91ae599363b8 100644 --- a/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h +++ b/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h @@ -201,9 +201,12 @@ class LowerTypeTestsPass : public PassInfoMixin { public: ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; + bool DropTypeTests; LowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {} + const ModuleSummaryIndex *ImportSummary, + bool DropTypeTests = false) + : ExportSummary(ExportSummary), ImportSummary(ImportSummary), + DropTypeTests(DropTypeTests) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 8af2af7f352f4d..86e28cfead80ec 100644 --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -236,6 +236,11 @@ struct VTableSlotSummary { uint64_t ByteOffset; }; +void updateVCallVisibilityInModule(Module &M, + bool WholeProgramVisibilityEnabledInLTO); +void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, + bool WholeProgramVisibilityEnabledInLTO); + /// Perform index-based whole program devirtualization on the \p Summary /// index. Any devirtualized targets used by a type test in another module /// are added to the \p ExportedGUIDs set. For any local devirtualized targets diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 297b11de17a90b..e8f0fd6866dc89 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -982,6 +982,11 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { } } + // If allowed, upgrade public vcall visibility metadata to linkage unit + // visibility before whole program devirtualization in the optimizer. + updateVCallVisibilityInModule(*RegularLTO.CombinedModule, + Conf.HasWholeProgramVisibility); + if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule)) return Error::success(); @@ -1299,6 +1304,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, std::set ExportedGUIDs; + // If allowed, upgrade public vcall visibility to linkage unit visibility in + // the summaries before whole program devirtualization below. + updateVCallVisibilityInIndex(ThinLTO.CombinedIndex, + Conf.HasWholeProgramVisibility); + // Perform index-based WPD. This will return immediately if there are // no index entries in the typeIdMetadata map (e.g. if we are instead // performing IR-based WPD in hybrid regular/thin LTO mode). diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index 5fef14230a9bb2..b3bc727e50adcb 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -57,6 +57,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include @@ -542,6 +543,13 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, } StatsFile = std::move(StatsFileOrErr.get()); + // Currently there is no support for enabling whole program visibility via a + // linker option in the old LTO API, but this call allows it to be specified + // via the internal option. Must be done before WPD invoked via the optimizer + // pipeline run below. + updateVCallVisibilityInModule(*MergedModule, + /* WholeProgramVisibilityEnabledInLTO */ false); + // We always run the verifier once on the merged module, the `DisableVerify` // parameter only applies to subsequent verify. verifyMergedModuleOnce(); diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 0bb518bf6cceb5..f4099e68315ee4 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -969,6 +969,12 @@ void ThinLTOCodeGenerator::run() { // Synthesize entry counts for functions in the combined index. computeSyntheticCounts(*Index); + // Currently there is no support for enabling whole program visibility via a + // linker option in the old LTO API, but this call allows it to be specified + // via the internal option. Must be done before WPD below. + updateVCallVisibilityInIndex(*Index, + /* WholeProgramVisibilityEnabledInLTO */ false); + // Perform index-based WPD. This will return immediately if there are // no index entries in the typeIdMetadata map (e.g. if we are instead // performing IR-based WPD in hybrid regular/thin LTO mode). diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index e6747a68e67265..6eba35aaa4ed70 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -382,6 +382,9 @@ class LowerTypeTestsModule { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; + // Set when the client has invoked this to simply drop all type test assume + // sequences. + bool DropTypeTests; Triple::ArchType Arch; Triple::OSType OS; @@ -500,7 +503,8 @@ class LowerTypeTestsModule { public: LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary); + const ModuleSummaryIndex *ImportSummary, + bool DropTypeTests); bool lower(); @@ -516,22 +520,24 @@ struct LowerTypeTests : public ModulePass { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; + bool DropTypeTests; LowerTypeTests() : ModulePass(ID), UseCommandLine(true) { initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } LowerTypeTests(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) + const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) : ModulePass(ID), ExportSummary(ExportSummary), - ImportSummary(ImportSummary) { + ImportSummary(ImportSummary), DropTypeTests(DropTypeTests) { initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override { if (UseCommandLine) return LowerTypeTestsModule::runForTesting(M); - return LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower(); + return LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests) + .lower(); } }; @@ -544,8 +550,9 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, ModulePass * llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) { - return new LowerTypeTests(ExportSummary, ImportSummary); + const ModuleSummaryIndex *ImportSummary, + bool DropTypeTests) { + return new LowerTypeTests(ExportSummary, ImportSummary, DropTypeTests); } /// Build a bit set for TypeId using the object layouts in @@ -1655,8 +1662,9 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet( /// Lower all type tests in this module. LowerTypeTestsModule::LowerTypeTestsModule( Module &M, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) { + const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) + : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary), + DropTypeTests(DropTypeTests) { assert(!(ExportSummary && ImportSummary)); Triple TargetTriple(M.getTargetTriple()); Arch = TargetTriple.getArch(); @@ -1683,7 +1691,8 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { bool Changed = LowerTypeTestsModule( M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, - ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr) + ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr, + /*DropTypeTests*/ false) .lower(); if (!ClWriteSummary.empty()) { @@ -1750,6 +1759,33 @@ void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) { } bool LowerTypeTestsModule::lower() { + Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + + if (DropTypeTests && TypeTestFunc) { + for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); + UI != UE;) { + auto *CI = cast((*UI++).getUser()); + // Find and erase llvm.assume intrinsics for this llvm.type.test call. + for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE;) { + if (auto *AssumeCI = dyn_cast((*CIU++).getUser())) { + Function *F = AssumeCI->getCalledFunction(); + if (F && F->getIntrinsicID() == Intrinsic::assume) + AssumeCI->eraseFromParent(); + } + } + CI->eraseFromParent(); + } + + // We have deleted the type intrinsics, so we no longer have enough + // information to reason about the liveness of virtual function pointers + // in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + + return true; + } + // If only some of the modules were split, we cannot correctly perform // this transformation. We already checked for the presense of type tests // with partially split modules during the thin link, and would have emitted @@ -1758,8 +1794,6 @@ bool LowerTypeTestsModule::lower() { (ImportSummary && ImportSummary->partiallySplitLTOUnits())) return false; - Function *TypeTestFunc = - M.getFunction(Intrinsic::getName(Intrinsic::type_test)); Function *ICallBranchFunnelFunc = M.getFunction(Intrinsic::getName(Intrinsic::icall_branch_funnel)); if ((!TypeTestFunc || TypeTestFunc->use_empty()) && @@ -2196,7 +2230,9 @@ bool LowerTypeTestsModule::lower() { PreservedAnalyses LowerTypeTestsPass::run(Module &M, ModuleAnalysisManager &AM) { - bool Changed = LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower(); + bool Changed = + LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests) + .lower(); if (!Changed) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 58cd99abcb5f00..5c5ddf4a38c839 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -140,6 +140,22 @@ static cl::opt cl::init(false), cl::ZeroOrMore, cl::desc("Print index-based devirtualization messages")); +/// Provide a way to force enable whole program visibility in tests. +/// This is needed to support legacy tests that don't contain +/// !vcall_visibility metadata (the mere presense of type tests +/// previously implied hidden visibility). +cl::opt + WholeProgramVisibility("whole-program-visibility", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Enable whole program visibility")); + +/// Provide a way to force disable whole program for debugging or workarounds, +/// when enabled via the linker. +cl::opt DisableWholeProgramVisibility( + "disable-whole-program-visibility", cl::init(false), cl::Hidden, + cl::ZeroOrMore, + cl::desc("Disable whole program visibility (overrides enabling options)")); + // Find the minimum offset that we may store a value of size Size bits at. If // IsAfter is set, look for an offset before the object, otherwise look for an // offset after the object. @@ -708,7 +724,49 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, return PreservedAnalyses::none(); } +// Enable whole program visibility if enabled by client (e.g. linker) or +// internal option, and not force disabled. +static bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) { + return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) && + !DisableWholeProgramVisibility; +} + namespace llvm { + +/// If whole program visibility asserted, then upgrade all public vcall +/// visibility metadata on vtable definitions to linkage unit visibility in +/// Module IR (for regular or hybrid LTO). +void updateVCallVisibilityInModule(Module &M, + bool WholeProgramVisibilityEnabledInLTO) { + if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) + return; + for (GlobalVariable &GV : M.globals()) + // Add linkage unit visibility to any variable with type metadata, which are + // the vtable definitions. We won't have an existing vcall_visibility + // metadata on vtable definitions with public visibility. + if (GV.hasMetadata(LLVMContext::MD_type) && + GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic) + GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit); +} + +/// If whole program visibility asserted, then upgrade all public vcall +/// visibility metadata on vtable definition summaries to linkage unit +/// visibility in Module summary index (for ThinLTO). +void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, + bool WholeProgramVisibilityEnabledInLTO) { + if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) + return; + for (auto &P : Index) { + for (auto &S : P.second.SummaryList) { + auto *GVar = dyn_cast(S.get()); + if (!GVar || GVar->vTableFuncs().empty() || + GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic) + continue; + GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit); + } + } +} + void runWholeProgramDevirtOnIndex( ModuleSummaryIndex &Summary, std::set &ExportedGUIDs, std::map> &LocalWPDTargetsMap) { @@ -853,6 +911,12 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; + // We cannot perform whole program devirtualization analysis on a vtable + // with public LTO visibility. + if (TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) + return false; + Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), TM.Offset + ByteOffset, M); if (!Ptr) @@ -898,8 +962,13 @@ bool DevirtIndex::tryFindVirtualCallTargets( return false; LocalFound = true; } - if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) + if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) { VS = cast(S->getBaseObject()); + // We cannot perform whole program devirtualization analysis on a vtable + // with public LTO visibility. + if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic) + return false; + } } if (!VS->isLive()) continue; @@ -1843,6 +1912,12 @@ bool DevirtModule::run() { removeRedundantTypeTests(); + // We have lowered or deleted the type instrinsics, so we will no + // longer have enough information to reason about the liveness of virtual + // function pointers in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + // The rest of the code is only necessary when exporting or during regular // LTO, so we are done. return true; @@ -1966,7 +2041,7 @@ bool DevirtModule::run() { for (VTableBits &B : Bits) rebuildGlobal(B); - // We have lowered or deleted the type checked load intrinsics, so we no + // We have lowered or deleted the type instrinsics, so we will no // longer have enough information to reason about the liveness of virtual // function pointers in GlobalDCE. for (GlobalVariable &GV : M.globals()) diff --git a/llvm/test/ThinLTO/X86/cache-typeid-resolutions.ll b/llvm/test/ThinLTO/X86/cache-typeid-resolutions.ll index 6618a6f280fe62..b52cceeb52cd2a 100644 --- a/llvm/test/ThinLTO/X86/cache-typeid-resolutions.ll +++ b/llvm/test/ThinLTO/X86/cache-typeid-resolutions.ll @@ -9,17 +9,17 @@ ; where both t and t-import are sensitive to typeid1's resolution ; so 4 distinct objects in total. ; RUN: rm -rf %t.cache -; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t1.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -r=%t1.bc,vt1,plx +; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility +; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t1.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -r=%t1.bc,vt1,plx -whole-program-visibility ; RUN: ls %t.cache | count 4 ; Three resolutions for typeid2: Indir, SingleImpl, UniqueRetVal ; where both t and t-import are sensitive to typeid2's resolution ; so 6 distinct objects in total. ; RUN: rm -rf %t.cache -; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t2.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t2.bc,vt2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t3.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t3.bc,vt2a,plx -r=%t3.bc,vt2b,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx +; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility +; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t2.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t2.bc,vt2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility +; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t3.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t3.bc,vt2a,plx -r=%t3.bc,vt2b,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility ; RUN: ls %t.cache | count 6 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/ThinLTO/X86/cfi-devirt.ll b/llvm/test/ThinLTO/X86/cfi-devirt.ll index dd83024e263443..311bed084deebf 100644 --- a/llvm/test/ThinLTO/X86/cfi-devirt.ll +++ b/llvm/test/ThinLTO/X86/cfi-devirt.ll @@ -6,6 +6,7 @@ ; Legacy PM ; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ ; RUN: -r=%t.o,_ZN1A1nEi,p \ @@ -23,6 +24,7 @@ ; New PM ; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ ; RUN: -r=%t.o,_ZN1A1nEi,p \ @@ -46,6 +48,7 @@ ; to ensure it is being caught in the thin link. ; RUN: opt -thinlto-bc -o %t2.o %S/Inputs/empty.ll ; RUN: not llvm-lto2 run %t.o %t2.o -thinlto-distributed-indexes \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ ; RUN: -r=%t.o,_ZN1A1nEi,p \ diff --git a/llvm/test/ThinLTO/X86/devirt-after-icp.ll b/llvm/test/ThinLTO/X86/devirt-after-icp.ll index af6eba77ba4e72..e4ac5549867e70 100644 --- a/llvm/test/ThinLTO/X86/devirt-after-icp.ll +++ b/llvm/test/ThinLTO/X86/devirt-after-icp.ll @@ -46,6 +46,7 @@ ; Legacy PM ; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,_Z3bazP1A,px \ ; RUN: -r=%t.o,_ZN1A3fooEv, \ @@ -64,6 +65,7 @@ ; New PM ; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,_Z3bazP1A,px \ ; RUN: -r=%t.o,_ZN1A3fooEv, \ diff --git a/llvm/test/ThinLTO/X86/devirt.ll b/llvm/test/ThinLTO/X86/devirt.ll index eae8c69eb06135..c24685c5d6bf4e 100644 --- a/llvm/test/ThinLTO/X86/devirt.ll +++ b/llvm/test/ThinLTO/X86/devirt.ll @@ -35,6 +35,7 @@ ; Legacy PM, Index based WPD ; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t2.o,test,px \ ; RUN: -r=%t2.o,_ZN1A1nEi,p \ @@ -48,6 +49,7 @@ ; New PM, Index based WPD ; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t2.o,test,px \ ; RUN: -r=%t2.o,_ZN1A1nEi,p \ @@ -62,6 +64,7 @@ ; Legacy PM ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. ; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -verify-machineinstrs=0 \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ @@ -84,6 +87,7 @@ ; New PM ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. ; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -verify-machineinstrs=0 \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ diff --git a/llvm/test/ThinLTO/X86/devirt2.ll b/llvm/test/ThinLTO/X86/devirt2.ll index 01eed382f24c91..a2cffa7be8c3e9 100644 --- a/llvm/test/ThinLTO/X86/devirt2.ll +++ b/llvm/test/ThinLTO/X86/devirt2.ll @@ -36,6 +36,7 @@ ; Legacy PM, Index based WPD ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ @@ -59,6 +60,7 @@ ; New PM, Index based WPD ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ @@ -92,6 +94,7 @@ ; Index based WPD, distributed backends ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm \ +; RUN: -whole-program-visibility \ ; RUN: -thinlto-distributed-indexes -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ @@ -115,6 +118,7 @@ ; Legacy PM ; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t5 \ ; RUN: -r=%t1.o,test,px \ ; RUN: -r=%t1.o,_ZTV1B, \ @@ -150,6 +154,7 @@ ; New PM ; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t5 \ ; RUN: -r=%t1.o,test,px \ ; RUN: -r=%t1.o,_ZTV1B, \ diff --git a/llvm/test/ThinLTO/X86/devirt_alias.ll b/llvm/test/ThinLTO/X86/devirt_alias.ll index 92aa2bcba917f0..78ebd471b010c3 100644 --- a/llvm/test/ThinLTO/X86/devirt_alias.ll +++ b/llvm/test/ThinLTO/X86/devirt_alias.ll @@ -7,6 +7,7 @@ ; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_alias.ll ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ diff --git a/llvm/test/ThinLTO/X86/devirt_available_externally.ll b/llvm/test/ThinLTO/X86/devirt_available_externally.ll index 128055f7022ec5..94ab7b75758748 100644 --- a/llvm/test/ThinLTO/X86/devirt_available_externally.ll +++ b/llvm/test/ThinLTO/X86/devirt_available_externally.ll @@ -18,6 +18,7 @@ ; EXTERNAL: gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ diff --git a/llvm/test/ThinLTO/X86/devirt_external_comdat_same_guid.ll b/llvm/test/ThinLTO/X86/devirt_external_comdat_same_guid.ll index 18482a051e2f3b..ce2f9b7175d6d2 100644 --- a/llvm/test/ThinLTO/X86/devirt_external_comdat_same_guid.ll +++ b/llvm/test/ThinLTO/X86/devirt_external_comdat_same_guid.ll @@ -8,6 +8,7 @@ ; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_external_comdat_same_guid.ll ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,use_B,px \ diff --git a/llvm/test/ThinLTO/X86/devirt_promote.ll b/llvm/test/ThinLTO/X86/devirt_promote.ll index 563ed994157a29..e93b4192cf1804 100644 --- a/llvm/test/ThinLTO/X86/devirt_promote.ll +++ b/llvm/test/ThinLTO/X86/devirt_promote.ll @@ -10,6 +10,7 @@ ; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_promote.ll ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ diff --git a/llvm/test/ThinLTO/X86/devirt_promote_legacy.ll b/llvm/test/ThinLTO/X86/devirt_promote_legacy.ll index 79fde540c5971d..7216911a6bc664 100644 --- a/llvm/test/ThinLTO/X86/devirt_promote_legacy.ll +++ b/llvm/test/ThinLTO/X86/devirt_promote_legacy.ll @@ -10,6 +10,7 @@ ; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_promote.ll ; RUN: llvm-lto -thinlto-action=run %t3.o %t4.o --thinlto-save-temps=%t5. \ +; RUN: -whole-program-visibility \ ; RUN: --pass-remarks=. \ ; RUN: --exported-symbol=test \ ; RUN: --exported-symbol=test2 \ diff --git a/llvm/test/ThinLTO/X86/devirt_single_hybrid.ll b/llvm/test/ThinLTO/X86/devirt_single_hybrid.ll index 5b2df6cf6f9be6..5966794152c167 100644 --- a/llvm/test/ThinLTO/X86/devirt_single_hybrid.ll +++ b/llvm/test/ThinLTO/X86/devirt_single_hybrid.ll @@ -5,6 +5,7 @@ ; RUN: opt -thinlto-bc -thinlto-split-lto-unit %p/Inputs/devirt_single_hybrid_foo.ll -o %t-foo.bc ; RUN: opt -thinlto-bc -thinlto-split-lto-unit %p/Inputs/devirt_single_hybrid_bar.ll -o %t-bar.bc ; RUN: llvm-lto2 run -save-temps %t-main.bc %t-foo.bc %t-bar.bc -pass-remarks=. -o %t \ +; RUN: -whole-program-visibility \ ; RUN: -r=%t-foo.bc,_Z3fooP1A,pl \ ; RUN: -r=%t-main.bc,main,plx \ ; RUN: -r=%t-main.bc,_Z3barv,l \ diff --git a/llvm/test/ThinLTO/X86/devirt_vcall_vis_hidden.ll b/llvm/test/ThinLTO/X86/devirt_vcall_vis_hidden.ll new file mode 100644 index 00000000000000..7a301491e05d13 --- /dev/null +++ b/llvm/test/ThinLTO/X86/devirt_vcall_vis_hidden.ll @@ -0,0 +1,143 @@ +; REQUIRES: x86-registered-target + +; Test devirtualization through the thin link and backend, when vtables +; have vcall_visibility metadata with public visibility. + +; Index based WPD +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t2.o %s +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Hybrid WPD +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s +; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. +; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -verify-machineinstrs=0 \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZN1A1nEi,p \ +; RUN: -r=%t.o,_ZN1B1fEi,p \ +; RUN: -r=%t.o,_ZN1C1fEi,p \ +; RUN: -r=%t.o,_ZN1D1mEi,p \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZTV1C, \ +; RUN: -r=%t.o,_ZTV1D, \ +; RUN: -r=%t.o,_ZN1A1nEi, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZN1C1fEi, \ +; RUN: -r=%t.o,_ZN1D1mEi, \ +; RUN: -r=%t.o,_ZTV1B,px \ +; RUN: -r=%t.o,_ZTV1C,px \ +; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK --dump-input=fail +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Regular LTO WPD +; RUN: opt -o %t4.o %s +; RUN: llvm-lto2 run %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -o %t5 \ +; RUN: -r=%t4.o,test,px \ +; RUN: -r=%t4.o,_ZN1A1nEi,p \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5 +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5 + + +; CHECK-IR-LABEL: define i32 @test +define i32 @test(%struct.A* %obj, %struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !4) + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !4} +!4 = distinct !{} +!5 = !{i64 1} diff --git a/llvm/test/ThinLTO/X86/devirt_vcall_vis_public.ll b/llvm/test/ThinLTO/X86/devirt_vcall_vis_public.ll new file mode 100644 index 00000000000000..468237bea4c805 --- /dev/null +++ b/llvm/test/ThinLTO/X86/devirt_vcall_vis_public.ll @@ -0,0 +1,215 @@ +; REQUIRES: x86-registered-target + +; Test devirtualization through the thin link and backend, when vtables +; have vcall_visibility metadata with public visibility. + +; Index based WPD +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t2.o %s +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Hybrid WPD +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s +; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. +; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -verify-machineinstrs=0 \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZN1A1nEi,p \ +; RUN: -r=%t.o,_ZN1B1fEi,p \ +; RUN: -r=%t.o,_ZN1C1fEi,p \ +; RUN: -r=%t.o,_ZN1D1mEi,p \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZTV1C, \ +; RUN: -r=%t.o,_ZTV1D, \ +; RUN: -r=%t.o,_ZN1A1nEi, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZN1C1fEi, \ +; RUN: -r=%t.o,_ZN1D1mEi, \ +; RUN: -r=%t.o,_ZTV1B,px \ +; RUN: -r=%t.o,_ZTV1C,px \ +; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK --dump-input=fail +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Regular LTO WPD +; RUN: opt -o %t4.o %s +; RUN: llvm-lto2 run %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -o %t5 \ +; RUN: -r=%t4.o,test,px \ +; RUN: -r=%t4.o,_ZN1A1nEi,p \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi + +; Try everything again but without -whole-program-visibility to confirm +; WPD fails + +; Index based WPD +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Hybrid WPD +; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -verify-machineinstrs=0 \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZN1A1nEi,p \ +; RUN: -r=%t.o,_ZN1B1fEi,p \ +; RUN: -r=%t.o,_ZN1C1fEi,p \ +; RUN: -r=%t.o,_ZN1D1mEi,p \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZTV1C, \ +; RUN: -r=%t.o,_ZTV1D, \ +; RUN: -r=%t.o,_ZN1A1nEi, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZN1C1fEi, \ +; RUN: -r=%t.o,_ZN1D1mEi, \ +; RUN: -r=%t.o,_ZTV1B,px \ +; RUN: -r=%t.o,_ZTV1C,px \ +; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Regular LTO WPD +; RUN: llvm-lto2 run %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t5 \ +; RUN: -r=%t4.o,test,px \ +; RUN: -r=%t4.o,_ZN1A1nEi,p \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Try index-based WPD again with both -whole-program-visibility and +; -disable-whole-program-visibility to confirm the latter overrides +; the former and that WPD fails. +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -disable-whole-program-visibility \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5 +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5 + + +; CHECK-IR-LABEL: define i32 @test +define i32 @test(%struct.A* %obj, %struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; CHECK-NODEVIRT-IR: %call = tail call i32 %fptr1 + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + ; CHECK-NODEVIRT-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !4) + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + ; CHECK-NODEVIRT-IR: %call4 = tail call i32 %fptr33 + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !4} +!4 = distinct !{} +!5 = !{i64 0} diff --git a/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll index e5d0e74b22e2d8..14de02a23c0649 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel-threshold.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel-threshold.ll index 91cd4e419b661f..1e7d38d9bdbfd7 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel-threshold.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel-threshold.ll @@ -1,8 +1,8 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=1 -S -o - %s | not grep @llvm.icall.branch.funnel | count 0 +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=1 -S -o - %s | not grep @llvm.icall.branch.funnel | count 0 -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=10 -S -o - %s | grep @llvm.icall.branch.funnel | count 4 +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=10 -S -o - %s | grep @llvm.icall.branch.funnel | count 4 -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=100 -S -o - %s | grep @llvm.icall.branch.funnel | count 5 +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=100 -S -o - %s | grep @llvm.icall.branch.funnel | count 5 target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll index db76080725bcde..32d964819fee5c 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -1,9 +1,9 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck --check-prefixes=CHECK,RETP %s -; RUN: sed -e 's,+retpoline,-retpoline,g' %s | opt -S -wholeprogramdevirt | FileCheck --check-prefixes=CHECK,NORETP %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck --check-prefixes=CHECK,RETP %s +; RUN: sed -e 's,+retpoline,-retpoline,g' %s | opt -S -wholeprogramdevirt -whole-program-visibility | FileCheck --check-prefixes=CHECK,NORETP %s -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,RETP %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,RETP %s -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -O3 -S -o - %s | FileCheck --check-prefixes=CHECK %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -O3 -S -o - %s | FileCheck --check-prefixes=CHECK %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t diff --git a/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll b/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll index f65e4132738203..2f26a2502e9c0b 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s -; RUN: opt -S -passes=wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s +; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll index f4ef8824e2e8b5..7302238aa7cdca 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll index 9f631e94cf545e..5b0b5aabdae49b 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll index 4c45c4081df10b..63ccfb833d4560 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll @@ -2,7 +2,7 @@ ; loading/saving index from/to bitcode ; RUN: llvm-as %s -o %t.bc ; RUN: llvm-as %p/Inputs/devirt-single-impl2-index.ll -o %t.index.bc -; RUN: opt %s -S -wholeprogramdevirt -wholeprogramdevirt-read-summary=%t.index.bc \ +; RUN: opt %s -S -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-read-summary=%t.index.bc \ ; RUN: -wholeprogramdevirt-summary-action=export \ ; RUN: -wholeprogramdevirt-write-summary=%t2.index.bc -o /dev/null ; RUN: llvm-dis %t2.index.bc -o - | FileCheck %s @@ -10,7 +10,7 @@ ; Check that opt fails to use summaries which don't contain regular LTO module ; when performing export. ; RUN: llvm-as %p/Inputs/devirt-bad-index.ll -o %t-bad.index.bc -; RUN: not opt %s -S -wholeprogramdevirt -wholeprogramdevirt-read-summary=%t-bad.index.bc \ +; RUN: not opt %s -S -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-read-summary=%t-bad.index.bc \ ; RUN: -wholeprogramdevirt-summary-action=export -o /dev/null 2>&1 | FileCheck %s --check-prefix=MISSING-MODULE ; Check single impl devirtulation in summary diff --git a/llvm/test/Transforms/WholeProgramDevirt/expand-check.ll b/llvm/test/Transforms/WholeProgramDevirt/expand-check.ll index 4effaba08b25b9..3ea078a183e862 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/expand-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/expand-check.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s ; Test that we correctly expand the llvm.type.checked.load intrinsic in cases ; where we cannot devirtualize. diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-nothing.ll b/llvm/test/Transforms/WholeProgramDevirt/export-nothing.ll index 4707eaa17ead34..64a714dd08d665 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-nothing.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-nothing.ll @@ -1,4 +1,4 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-write-summary=%t -o /dev/null %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-write-summary=%t -o /dev/null %s ; RUN: FileCheck %s < %t ; CHECK: --- diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll index cbba1a4d1644cc..33ff9e1afe50f6 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll @@ -1,4 +1,4 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t ; SUMMARY: TypeIdMap: diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll b/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll index 43adb90d69f1c2..cb2fddd75d1d0e 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll @@ -1,4 +1,4 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t ; SUMMARY-NOT: TypeTests: diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll b/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll index 4260a2e570d16a..0f780a3873687c 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll @@ -1,4 +1,4 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t ; SUMMARY-NOT: TypeTests: diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll b/llvm/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll index 3132444a9f36aa..dd16fa052afc43 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll @@ -1,4 +1,4 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -o /dev/null %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -o /dev/null %s ; RUN: FileCheck %s < %t ; CHECK: TypeTests: [ 15427464259790519041, 17525413373118030901 ] diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll b/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll index 5982ad4ec58f1d..eb7b36e87dd62b 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll @@ -1,7 +1,7 @@ -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,X86 %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,X86 %s ; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-X86 %s < %t -; RUN: opt -mtriple=armv7-unknown-linux-gnu -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,ARM %s +; RUN: opt -mtriple=armv7-unknown-linux-gnu -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,ARM %s ; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-ARM %s < %t target datalayout = "e-p:64:64" diff --git a/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll index ecc8ad0e7c73e2..1a38efa01ec9ee 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s ; CHECK-NOT: devirtualized call diff --git a/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll index 5e76a5a7dde476..443a850373d1d8 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/soa-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/soa-vtable.ll index 3b6afc52e5dc89..86c9a9938512cd 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/soa-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/soa-vtable.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/struct-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/struct-vtable.ll index 81e41d466941be..8afa1d99c508a6 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/struct-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/struct-vtable.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll index 8fea9bc7b24002..b9d205939364a9 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll index ef3a7e49b52c2b..7626aba24c1ab5 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll b/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll index 4452bb75d03f8e..f03c07d24de653 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll index ca76383c49434c..07a9ac7e2300ac 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s -; RUN: opt -S -passes=wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s +; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-decl.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-decl.ll index 1c4e2fbe97aa4c..35be7e4cabd336 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-decl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-decl.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll index ce76c8e6797e38..0c3c4e9b2ff53a 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll index cc2ff33296a998..3580663b7f06e0 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll index c24c3b4be68323..a0de6d278331bb 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll index 7016263f8f7ba2..e96323e3d782d9 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s ; Test that we correctly handle function type mismatches in argument counts ; and bitwidths. We handle an argument count mismatch by refusing diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll index 542402e1657727..1c5b0ae05b18f2 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll index 6e55235ebfcd82..22426dfc4497e6 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll index 3299f7bce65bca..b7537f26e4d054 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll index 5ab9571a0caadf..b3cb4de7240ead 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vtable-decl.ll b/llvm/test/Transforms/WholeProgramDevirt/vtable-decl.ll index e56170a4997f2c..cdb3856c70f85a 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vtable-decl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vtable-decl.ll @@ -1,5 +1,5 @@ ; Check that we don't crash when processing declaration with type metadata -; RUN: opt -S -wholeprogramdevirt %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-none-linux-gnu" diff --git a/llvm/test/tools/gold/X86/devirt_vcall_vis_public.ll b/llvm/test/tools/gold/X86/devirt_vcall_vis_public.ll new file mode 100644 index 00000000000000..53cd3fd4e361aa --- /dev/null +++ b/llvm/test/tools/gold/X86/devirt_vcall_vis_public.ll @@ -0,0 +1,148 @@ +; Test that plugin option whole-program-visibility enables devirtualization. + +; Index based WPD +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t2.o %s +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=whole-program-visibility \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t2.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Hybrid WPD +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=whole-program-visibility \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Regular LTO WPD +; RUN: opt -o %t4.o %s +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=whole-program-visibility \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t4.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi + +; Try everything again but without -whole-program-visibility to confirm +; WPD fails + +; Index based WPD +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t2.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Hybrid WPD +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Regular LTO WPD +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t4.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5 +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5 + + +; CHECK-IR-LABEL: define dso_local i32 @_start +define i32 @_start(%struct.A* %obj, %struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; CHECK-NODEVIRT-IR: %call = tail call i32 %fptr1 + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + ; CHECK-NODEVIRT-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !4) + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + ; CHECK-NODEVIRT-IR: %call4 = tail call i32 %fptr33 + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !4} +!4 = distinct !{} +!5 = !{i64 0} diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index 406079dad307e9..93e9b60873ebf6 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -204,6 +204,8 @@ namespace options { static std::string dwo_dir; /// Statistics output filename. static std::string stats_file; + // Asserts that LTO link has whole program visibility + static bool whole_program_visibility = false; // Optimization remarks filename, accepted passes and hotness options static std::string RemarksFilename; @@ -283,6 +285,8 @@ namespace options { new_pass_manager = true; } else if (opt == "debug-pass-manager") { debug_pass_manager = true; + } else if (opt == "whole-program-visibility") { + whole_program_visibility = true; } else if (opt.startswith("dwo_dir=")) { dwo_dir = opt.substr(strlen("dwo_dir=")); } else if (opt.startswith("opt-remarks-filename=")) { @@ -926,6 +930,8 @@ static std::unique_ptr createLTO(IndexWriteCallback OnIndexWrite, // Debug new pass manager if requested Conf.DebugPassManager = options::debug_pass_manager; + Conf.HasWholeProgramVisibility = options::whole_program_visibility; + Conf.StatsFile = options::stats_file; return std::make_unique(std::move(Conf), Backend, options::ParallelCodeGenParallelismLevel); diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 75a6cdc3892b33..82cf47d9ebd605 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -54,6 +54,7 @@ #include "llvm/Transforms/Coroutines.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Debugify.h" #include @@ -625,6 +626,13 @@ int main(int argc, char **argv) { return 1; } + // Enable testing of whole program devirtualization on this module by invoking + // the facility for updating public visibility to linkage unit visibility when + // specified by an internal option. This is normally done during LTO which is + // not performed via opt. + updateVCallVisibilityInModule(*M, + /* WholeProgramVisibilityEnabledInLTO */ false); + // Figure out what stream we are supposed to write to... std::unique_ptr Out; std::unique_ptr ThinLinkOut;