65 changes: 40 additions & 25 deletions compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
// ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported
// UNSUPPORTED: ppc && host-byteorder-big-endian

// RUN: rm -rf %t && mkdir %t && cd %t
// RUN: rm -rf %t && mkdir %t && split-file %s %t && cd %t

// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o test
// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling lib.cpp main.cpp -o test
// RUN: env LLVM_PROFILE_FILE=test.profraw ./test

// Show vtable profiles from raw profile.
Expand Down Expand Up @@ -37,23 +37,23 @@
// COMMON-NEXT: Number of instrumented vtables: 2
// RAW: Indirect Target Results:
// RAW-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
// RAW-NEXT: [ 0, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
// RAW-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
// RAW-NEXT: [ 1, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
// RAW-NEXT: VTable Results:
// RAW-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
// RAW-NEXT: [ 0, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
// RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
// RAW-NEXT: [ 1, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
// INDEXED: Indirect Target Results:
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
// INDEXED-NEXT: [ 0, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
// INDEXED-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
// INDEXED-NEXT: [ 1, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
// INDEXED-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
// INDEXED-NEXT: VTable Results:
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
// INDEXED-NEXT: [ 0, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
// INDEXED-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
// INDEXED-NEXT: [ 1, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
// INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
// COMMON: Instrumentation level: IR entry_first = 0
// COMMON-NEXT: Functions shown: 1
Expand Down Expand Up @@ -93,27 +93,27 @@
// ICTEXT: # NumValueSites:
// ICTEXT: 2
// ICTEXT: 2
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
// ICTEXT: {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
// ICTEXT: _ZN8Derived14funcEii:50
// ICTEXT: 2
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
// ICTEXT: {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
// ICTEXT: _ZN8Derived1D0Ev:250
// ICTEXT: # ValueKind = IPVK_VTableTarget:
// ICTEXT: 2
// ICTEXT: # NumValueSites:
// ICTEXT: 2
// ICTEXT: 2
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
// ICTEXT: {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
// ICTEXT: _ZTV8Derived1:50
// ICTEXT: 2
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
// ICTEXT: {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
// ICTEXT: _ZTV8Derived1:250

// When vtable value profiles exist, pgo-instr-use pass should annotate them
// even if `-enable-vtable-value-profiling` is not explicitly on.
// RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \
// RUN: -mllvm -print-after=pgo-instr-use -mllvm -filter-print-funcs=main \
// RUN: -mllvm -print-module-scope %s 2>&1 | FileCheck %s --check-prefix=ANNOTATE
// RUN: -mllvm -print-module-scope lib.cpp main.cpp 2>&1 | FileCheck %s --check-prefix=ANNOTATE

// ANNOTATE-NOT: Inconsistent number of value sites
// ANNOTATE: !{!"VP", i32 2
Expand All @@ -122,7 +122,7 @@
// if `-icp-max-num-vtables` is set to zero.
// RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \
// RUN: -mllvm -icp-max-num-vtables=0 -mllvm -print-after=pgo-instr-use \
// RUN: -mllvm -filter-print-funcs=main -mllvm -print-module-scope %s 2>&1 | \
// RUN: -mllvm -filter-print-funcs=main -mllvm -print-module-scope lib.cpp main.cpp 2>&1 | \
// RUN: FileCheck %s --check-prefix=OMIT

// OMIT: Inconsistent number of value sites
Expand All @@ -141,28 +141,29 @@
// RUN: -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \
// RUN: -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \
// RUN: -mllvm -enable-vtable-profile-use \
// RUN: -Wl,-plugin-opt,-import-assume-unique-local \
// RUN: -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \
// RUN: -Wl,-mllvm,-print-after=pgo-icall-prom \
// RUN: -Wl,-mllvm,-filter-print-funcs=main %s 2>&1 \
// RUN: -Wl,-mllvm,-filter-print-funcs=main lib.cpp main.cpp 2>&1 \
// RUN: | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP"

// For the indirect call site `ptr->func`
// REMARK: instrprof-vtable-value-prof.cpp:226:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
// REMARK: instrprof-vtable-value-prof.cpp:226:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
// REMARK: main.cpp:10:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii.llvm.{{.*}} with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}}
// REMARK: main.cpp:10:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
//
// For the indirect call site `delete ptr`
// REMARK: instrprof-vtable-value-prof.cpp:228:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
// REMARK: instrprof-vtable-value-prof.cpp:228:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
// REMARK: main.cpp:12:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev.llvm.{{.*}} with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}}
// REMARK: main.cpp:12:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}

// The IR matchers for indirect callsite `ptr->func`.
// IR-LABEL: @main
// IR: [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei
// IR: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]]
// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E, i32 16)
// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}, i32 16)
// IR: br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]],
//
// IR: [[BB1]]:
// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii
// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii.llvm.{{.*}}
// IR: br label %[[MERGE0:[a-zA-Z0-9_.]+]]
//
// IR: [[BB2]]:
Expand All @@ -185,6 +186,7 @@
// IR: [[MERGE0]]:
// IR: [[RES2:%.*]] = phi i32 [ [[RES1]], %[[MERGE1]] ], [ [[RESBB1]], %[[BB1]] ]

//--- lib.h
#include <stdio.h>
#include <stdlib.h>
class Base {
Expand All @@ -193,12 +195,19 @@ class Base {

virtual ~Base() {};
};

class Derived1 : public Base {
public:
int func(int a, int b) override { return a * b; }
int func(int a, int b) override;

~Derived1() {}
};

__attribute__((noinline)) Base *createType(int a);

//--- lib.cpp
#include "lib.h"

namespace {
class Derived2 : public Base {
public:
Expand All @@ -207,7 +216,10 @@ class Derived2 : public Base {
~Derived2() {}
};
} // namespace
__attribute__((noinline)) Base *createType(int a) {

int Derived1::func(int a, int b) { return a * b; }

Base *createType(int a) {
Base *base = nullptr;
if (a % 4 == 0)
base = new Derived1();
Expand All @@ -216,6 +228,9 @@ __attribute__((noinline)) Base *createType(int a) {
return base;
}

//--- main.cpp
#include "lib.h"

int main(int argc, char **argv) {
int sum = 0;
for (int i = 0; i < 1000; i++) {
Expand Down
11 changes: 1 addition & 10 deletions libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,7 @@ function(_get_common_compile_options output_var flags)
list(APPEND compile_options "-ffixed-point")
endif()

# Builtin recognition causes issues when trying to implement the builtin
# functions themselves. The GPU backends do not use libcalls so we disable
# the known problematic ones. This allows inlining during LTO linking.
if(LIBC_TARGET_OS_IS_GPU)
set(libc_builtins bcmp strlen memmem bzero memcmp memcpy memmem memmove
memset strcmp strstr)
foreach(builtin ${libc_builtins})
list(APPEND compile_options "-fno-builtin-${builtin}")
endforeach()
else()
if(NOT LIBC_TARGET_OS_IS_GPU)
list(APPEND compile_options "-fno-builtin")
endif()

Expand Down
11 changes: 11 additions & 0 deletions libc/cmake/modules/LLVMLibCObjectRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,17 @@ function(create_entrypoint_object fq_target_name)
add_dependencies(${fq_target_name} ${full_deps_list})
target_link_libraries(${fq_target_name} ${full_deps_list})

# Builtin recognition causes issues when trying to implement the builtin
# functions themselves. The GPU backends do not use libcalls so we disable the
# known problematic ones on the entrypoints that implement them.
if(LIBC_TARGET_OS_IS_GPU)
set(libc_builtins bcmp strlen memmem bzero memcmp memcpy memmem memmove
memset strcmp strstr)
if(${ADD_ENTRYPOINT_OBJ_NAME} IN_LIST libc_builtins)
target_compile_options(${fq_target_name} PRIVATE -fno-builtin-${ADD_ENTRYPOINT_OBJ_NAME})
endif()
endif()

set_target_properties(
${fq_target_name}
PROPERTIES
Expand Down
71 changes: 45 additions & 26 deletions llvm/lib/Transforms/IPO/FunctionImport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,17 @@ static cl::opt<std::string> WorkloadDefinitions(
"}"),
cl::Hidden);

static cl::opt<bool> ImportAssumeUniqueLocal(
"import-assume-unique-local", cl::init(false),
cl::desc(
"By default, a local-linkage global variable won't be imported in the "
"edge mod1:func -> mod2:local-var (from value profiles) since compiler "
"cannot assume mod2 is compiled with full path which gives local-var a "
"program-wide unique GUID. Set this option to true will help cross "
"module import of such variables. This is only safe if the compiler "
"user specify the full module path."),
cl::Hidden);

namespace llvm {
extern cl::opt<bool> EnableMemProfContextDisambiguation;
}
Expand All @@ -196,6 +207,23 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
return Result;
}

static bool shouldSkipLocalInAnotherModule(const GlobalVarSummary *RefSummary,
size_t NumDefs,
StringRef ImporterModule) {
// We can import a local from another module if all inputs are compiled
// with full paths or when there is one definition.
if (ImportAssumeUniqueLocal || NumDefs == 1)
return false;
// In other cases, make sure we import the copy in the caller's module if the
// referenced value has local linkage. The only time a local variable can
// share an entry in the index is if there is a local with the same name in
// another module that had the same source file name (in a different
// directory), where each was compiled in their own directory so there was not
// distinguishing path.
return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
RefSummary->modulePath() != ImporterModule;
}

/// Given a list of possible callee implementation for a call site, qualify the
/// legality of importing each. The return is a range of pairs. Each pair
/// corresponds to a candidate. The first value is the ImportFailureReason for
Expand Down Expand Up @@ -228,19 +256,21 @@ static auto qualifyCalleeCandidates(
if (!Summary)
return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};

// If this is a local function, make sure we import the copy
// in the caller's module. The only time a local function can
// share an entry in the index is if there is a local with the same name
// in another module that had the same source file name (in a different
// directory), where each was compiled in their own directory so there
// was not distinguishing path.
// However, do the import from another module if there is only one
// entry in the list - in that case this must be a reference due
// to indirect call profile data, since a function pointer can point to
// a local in another module.
if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
CalleeSummaryList.size() > 1 &&
Summary->modulePath() != CallerModulePath)
// If this is a local function, make sure we import the copy in the
// caller's module. The only time a local function can share an entry in
// the index is if there is a local with the same name in another module
// that had the same source file name (in a different directory), where
// each was compiled in their own directory so there was not
// distinguishing path.
// If the local function is from another module, it must be a reference
// due to indirect call profile data since a function pointer can point
// to a local in another module. Do the import from another module if
// there is only one entry in the list or when all files in the program
// are compiled with full path - in both cases the local function has
// unique PGO name and GUID.
if (shouldSkipLocalInAnotherModule(dyn_cast<GlobalVarSummary>(Summary),
CalleeSummaryList.size(),
CallerModulePath))
return {
FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
GVSummary};
Expand Down Expand Up @@ -359,18 +389,6 @@ class GlobalsImporter final {

LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");

// If this is a local variable, make sure we import the copy
// in the caller's module. The only time a local variable can
// share an entry in the index is if there is a local with the same name
// in another module that had the same source file name (in a different
// directory), where each was compiled in their own directory so there
// was not distinguishing path.
auto LocalNotInModule =
[&](const GlobalValueSummary *RefSummary) -> bool {
return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
RefSummary->modulePath() != Summary.modulePath();
};

for (const auto &RefSummary : VI.getSummaryList()) {
const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
// Functions could be referenced by global vars - e.g. a vtable; but we
Expand All @@ -379,7 +397,8 @@ class GlobalsImporter final {
// based on profile information). Should we decide to handle them here,
// we can refactor accordingly at that time.
if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
LocalNotInModule(GVS))
shouldSkipLocalInAnotherModule(GVS, VI.getSummaryList().size(),
Summary.modulePath()))
continue;

// If there isn't an entry for GUID, insert <GUID, Definition> pair.
Expand Down