diff --git a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp new file mode 100644 index 0000000000000..5c8426b40892f --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp @@ -0,0 +1,142 @@ +// REQUIRES: lld-available + +// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o %t-test +// RUN: env LLVM_PROFILE_FILE=%t-test.profraw %t-test + +// Show vtable profiles from raw profile. +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profraw | FileCheck %s --check-prefixes=COMMON,RAW + +// Generate indexed profile from raw profile and show the data. +// RUN: llvm-profdata merge %t-test.profraw -o %t-test.profdata +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED + +// Generate text profile from raw and indexed profiles respectively and show the data. +// RUN: llvm-profdata merge --text %t-test.profraw -o %t-raw.proftext +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-raw.proftext | FileCheck %s --check-prefix=ICTEXT +// RUN: llvm-profdata merge --text %t-test.profdata -o %t-indexed.proftext +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-indexed.proftext | FileCheck %s --check-prefix=ICTEXT + +// Generate indexed profile from text profiles and show the data +// RUN: llvm-profdata merge --binary %t-raw.proftext -o %t-text.profraw +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED +// RUN: llvm-profdata merge --binary %t-indexed.proftext -o %t-text.profdata +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED + +// COMMON: Counters: +// COMMON-NEXT: main: +// COMMON-NEXT: Hash: 0x0f9a16fe6d398548 +// COMMON-NEXT: Counters: 2 +// COMMON-NEXT: Indirect Call Site Count: 2 +// COMMON-NEXT: Number of instrumented vtables: 2 +// RAW: Indirect Target Results: +// RAW-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%) +// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%) +// RAW-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%) +// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%) +// RAW-NEXT: VTable Results: +// RAW-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%) +// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) +// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// INDEXED: Indirect Target Results: +// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%) +// INDEXED-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%) +// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%) +// INDEXED-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%) +// INDEXED-NEXT: VTable Results: +// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// INDEXED-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%) +// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) +// COMMON: Instrumentation level: IR entry_first = 0 +// COMMON-NEXT: Functions shown: 1 +// COMMON-NEXT: Total functions: 6 +// COMMON-NEXT: Maximum function count: 1000 +// COMMON-NEXT: Maximum internal block count: 250 +// COMMON-NEXT: Statistics for indirect call sites profile: +// COMMON-NEXT: Total number of sites: 2 +// COMMON-NEXT: Total number of sites with values: 2 +// COMMON-NEXT: Total number of profiled values: 4 +// COMMON-NEXT: Value sites histogram: +// COMMON-NEXT: NumTargets, SiteCount +// COMMON-NEXT: 2, 2 +// COMMON-NEXT: Statistics for vtable profile: +// COMMON-NEXT: Total number of sites: 2 +// COMMON-NEXT: Total number of sites with values: 2 +// COMMON-NEXT: Total number of profiled values: 4 +// COMMON-NEXT: Value sites histogram: +// COMMON-NEXT: NumTargets, SiteCount +// COMMON-NEXT: 2, 2 + +// ICTEXT: :ir +// ICTEXT: main +// ICTEXT: # Func Hash: +// ICTEXT: 1124236338992350536 +// ICTEXT: # Num Counters: +// ICTEXT: 2 +// ICTEXT: # Counter Values: +// ICTEXT: 1000 +// ICTEXT: 1 +// ICTEXT: # Num Value Kinds: +// ICTEXT: 2 +// ICTEXT: # ValueKind = IPVK_IndirectCallTarget: +// ICTEXT: 0 +// ICTEXT: # NumValueSites: +// ICTEXT: 2 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii:750 +// ICTEXT: _ZN8Derived15func1Eii:250 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii:750 +// ICTEXT: _ZN8Derived15func2Eii:250 +// ICTEXT: # ValueKind = IPVK_VTableTarget: +// ICTEXT: 2 +// ICTEXT: # NumValueSites: +// ICTEXT: 2 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750 +// ICTEXT: _ZTV8Derived1:250 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750 +// ICTEXT: _ZTV8Derived1:250 + +#include +#include +class Base { +public: + virtual int func1(int a, int b) = 0; + virtual int func2(int a, int b) = 0; +}; +class Derived1 : public Base { +public: + int func1(int a, int b) override { return a + b; } + + int func2(int a, int b) override { return a * b; } +}; +namespace { +class Derived2 : public Base { +public: + int func1(int a, int b) override { return a - b; } + + int func2(int a, int b) override { return a * (a - b); } +}; +} // namespace +__attribute__((noinline)) Base *createType(int a) { + Base *base = nullptr; + if (a % 4 == 0) + base = new Derived1(); + else + base = new Derived2(); + return base; +} +int main(int argc, char **argv) { + int sum = 0; + for (int i = 0; i < 1000; i++) { + int a = rand(); + int b = rand(); + Base *ptr = createType(i); + sum += ptr->func1(a, b) + ptr->func2(b, a); + } + printf("sum is %d\n", sum); + return 0; +} diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h index 0825e19ecd2d2..50815f4e3e839 100644 --- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h +++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h @@ -16,23 +16,75 @@ #include namespace llvm { -// Visitor class that finds all indirect call. +// Visitor class that finds indirect calls or instructions that gives vtable +// value, depending on Type. struct PGOIndirectCallVisitor : public InstVisitor { + enum class InstructionType { + kIndirectCall = 0, + kVTableVal = 1, + }; std::vector IndirectCalls; - PGOIndirectCallVisitor() = default; + std::vector ProfiledAddresses; + PGOIndirectCallVisitor(InstructionType Type) : Type(Type) {} void visitCallBase(CallBase &Call) { - if (Call.isIndirectCall()) + if (!Call.isIndirectCall()) + return; + + if (Type == InstructionType::kIndirectCall) { IndirectCalls.push_back(&Call); + return; + } + + assert(Type == InstructionType::kVTableVal && "Control flow guaranteed"); + + LoadInst *LI = dyn_cast(Call.getCalledOperand()); + // The code pattern to look for + // + // %vtable = load ptr, ptr %b + // %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 + // %2 = load ptr, ptr %vfn + // %call = tail call i32 %2(ptr %b) + // + // %vtable is the vtable address value to profile, and + // %2 is the indirect call target address to profile. + if (LI != nullptr) { + Value *Ptr = LI->getPointerOperand(); + Value *VTablePtr = Ptr->stripInBoundsConstantOffsets(); + // This is a heuristic to find address feeding instructions. + // FIXME: Add support in the frontend so LLVM type intrinsics are + // emitted without LTO. This way, added intrinsics could filter + // non-vtable instructions and reduce instrumentation overhead. + // Since a non-vtable profiled address is not within the address + // range of vtable objects, it's stored as zero in indexed profiles. + // A pass that looks up symbol with an zero hash will (almost) always + // find nullptr and skip the actual transformation (e.g., comparison + // of symbols). So the performance overhead from non-vtable profiled + // address is negligible if exists at all. Comparing loaded address + // with symbol address guarantees correctness. + if (VTablePtr != nullptr && isa(VTablePtr)) + ProfiledAddresses.push_back(cast(VTablePtr)); + } } + +private: + InstructionType Type; }; -// Helper function that finds all indirect call sites. inline std::vector findIndirectCalls(Function &F) { - PGOIndirectCallVisitor ICV; + PGOIndirectCallVisitor ICV( + PGOIndirectCallVisitor::InstructionType::kIndirectCall); ICV.visit(F); return ICV.IndirectCalls; } + +inline std::vector findVTableAddrs(Function &F) { + PGOIndirectCallVisitor ICV( + PGOIndirectCallVisitor::InstructionType::kVTableVal); + ICV.visit(F); + return ICV.ProfiledAddresses; +} + } // namespace llvm #endif diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 612c444faec64..5397380992ed4 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -89,6 +90,9 @@ inline StringRef getInstrProfValueProfMemOpFuncName() { /// Return the name prefix of variables containing instrumented function names. inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } +/// Return the name prefix of variables containing virtual table profile data. +inline StringRef getInstrProfVTableVarPrefix() { return "__profvt_"; } + /// Return the name prefix of variables containing per-function control data. inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } @@ -106,9 +110,9 @@ inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } /// Return the name of the variable holding the strings (possibly compressed) /// of all function's PGO names. -inline StringRef getInstrProfNamesVarName() { - return "__llvm_prf_nm"; -} +inline StringRef getInstrProfNamesVarName() { return "__llvm_prf_nm"; } + +inline StringRef getInstrProfVTableNamesVarName() { return "__llvm_prf_vnm"; } /// Return the name of a covarage mapping variable (internal linkage) /// for each instrumented source module. Such variables are allocated @@ -140,7 +144,8 @@ inline StringRef getInstrProfRegFuncName() { return "__llvm_profile_register_function"; } -/// Return the name of the runtime interface that registers the PGO name strings. +/// Return the name of the runtime interface that registers the PGO name +/// strings. inline StringRef getInstrProfNamesRegFuncName() { return "__llvm_profile_register_names_function"; } @@ -246,6 +251,9 @@ Error collectGlobalObjectNameStrings(ArrayRef NameStrs, Error collectPGOFuncNameStrings(ArrayRef NameVars, std::string &Result, bool doCompression = true); +Error collectVTableStrings(ArrayRef VTables, + std::string &Result, bool doCompression); + /// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being /// set in IR PGO compilation. bool isIRPGOFlagSet(const Module *M); @@ -288,6 +296,8 @@ inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } /// Return the PGOFuncName meta data associated with a function. MDNode *getPGOFuncNameMetadata(const Function &F); +std::string getPGOName(const GlobalVariable &V, bool InLTO = false); + /// Create the PGOFuncName meta data if PGOFuncName is different from /// function's raw name. This should only apply to internal linkage functions /// declared by users only. @@ -295,7 +305,7 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); /// Check if we can use Comdat for profile variables. This will eliminate /// the duplicated profile variables for Comdat functions. -bool needsComdatForCounter(const Function &F, const Module &M); +bool needsComdatForCounter(const GlobalObject &GV, const Module &M); /// An enum describing the attributes of an instrumented profile. enum class InstrProfKind { @@ -431,23 +441,34 @@ class InstrProfSymtab { using AddrHashMap = std::vector>; private: + using AddrIntervalMap = + IntervalMap>; StringRef Data; uint64_t Address = 0; - // Unique name strings. + // Unique name strings. Used to ensure entries in MD5NameMap (a vector that's + // going to be sorted) has unique MD5 keys in the first place. StringSet<> NameTab; + // Records the unique virtual table names. This is used by InstrProfWriter to + // write out an on-disk chained hash table of virtual table names. + // InstrProfWriter stores per function profile data (keyed by function names) + // so it doesn't use a StringSet for function names. + StringSet<> VTableNames; // A map from MD5 keys to function name strings. std::vector> MD5NameMap; + // A map from MD5 keys to function define. We only populate this map // when build the Symtab from a Module. std::vector> MD5FuncMap; // A map from function runtime address to function name MD5 hash. // This map is only populated and used by raw instr profile reader. AddrHashMap AddrToMD5Map; + + AddrIntervalMap::Allocator VTableAddrMapAllocator; + // This map is only populated and used by raw instr profile reader. + AddrIntervalMap VTableAddrMap; bool Sorted = false; - static StringRef getExternalSymbol() { - return "** External Symbol **"; - } + static StringRef getExternalSymbol() { return "** External Symbol **"; } // Returns the canonial name of the given PGOName. In a canonical name, all // suffixes that begins with "." except ".__uniq." are stripped. @@ -469,7 +490,7 @@ class InstrProfSymtab { inline void finalizeSymtab(); public: - InstrProfSymtab() = default; + InstrProfSymtab() : VTableAddrMap(VTableAddrMapAllocator) {} // Not copyable or movable. // Consider std::unique_ptr for move. @@ -488,9 +509,19 @@ class InstrProfSymtab { /// \c NameStrings is a string composed of one of more sub-strings /// encoded in the format described in \c collectPGOFuncNameStrings. - /// This method is a wrapper to \c readPGOFuncNameStrings method. + /// This method is a wrapper to \c readAndDecodeStrings method. Error create(StringRef NameStrings); + /// Initialize symtab states with function names and vtable names. \c + /// FuncNameStrings is a string composed of one or more encoded function name + /// strings, and \c VTableNameStrings composes of one or more encoded vtable + /// names. This interface is solely used by raw profile reader. + Error create(StringRef FuncNameStrings, StringRef VTableNameStrings); + + /// Initialize 'this' with the set of vtable names encoded in + /// \c CompressedVTableNames. + Error initVTableNamesFromCompressedStrings(StringRef CompressedVTableNames); + /// This interface is used by reader of CoverageMapping test /// format. inline Error create(StringRef D, uint64_t BaseAddr); @@ -503,32 +534,69 @@ class InstrProfSymtab { /// Create InstrProfSymtab from a set of names iteratable from /// \p IterRange. This interface is used by IndexedProfReader. - template Error create(const NameIterRange &IterRange); - - /// Update the symtab by adding \p FuncName to the table. This interface - /// is used by the raw and text profile readers. - Error addFuncName(StringRef FuncName) { - if (FuncName.empty()) + template + Error create(const NameIterRange &IterRange); + + /// Create InstrProfSymtab from a set of function names and vtable + /// names iteratable from \p IterRange. This interface is used by + /// IndexedProfReader. + template + Error create(const FuncNameIterRange &FuncIterRange, + const VTableNameIterRange &VTableIterRange); + + Error addSymbolName(StringRef SymbolName) { + if (SymbolName.empty()) return make_error(instrprof_error::malformed, - "function name is empty"); - auto Ins = NameTab.insert(FuncName); + "symbol name is empty"); + + // Insert into NameTab so that MD5NameMap (a vector that will be sorted) + // won't have duplicated entries in the first place. + auto Ins = NameTab.insert(SymbolName); if (Ins.second) { MD5NameMap.push_back(std::make_pair( - IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); + IndexedInstrProf::ComputeHash(SymbolName), Ins.first->getKey())); Sorted = false; } return Error::success(); } + /// The method name is kept since there are many callers. + /// It just forwards to 'addSymbolName'. + Error addFuncName(StringRef FuncName) { return addSymbolName(FuncName); } + + /// Adds VTableName as a known symbol, and inserts it to a map that + /// tracks all vtable names. + Error addVTableName(StringRef VTableName) { + if (Error E = addSymbolName(VTableName)) + return E; + + // Record VTableName. InstrProfWriter uses this set. The comment around + // class member explains why. + VTableNames.insert(VTableName); + return Error::success(); + } + + const StringSet<> &getVTableNames() const { return VTableNames; } + /// Map a function address to its name's MD5 hash. This interface /// is only used by the raw profiler reader. void mapAddress(uint64_t Addr, uint64_t MD5Val) { AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); } + /// Map the address range (i.e., [start_address, end_address)) of a variable + /// to its names' MD5 hash. This interface is only used by the raw profile + /// reader. + void mapVTableAddress(uint64_t StartAddr, uint64_t EndAddr, uint64_t MD5Val) { + VTableAddrMap.insert(StartAddr, EndAddr, MD5Val); + } + /// Return a function's hash, or 0, if the function isn't in this SymTab. uint64_t getFunctionHashFromAddress(uint64_t Address); + /// Return a vtable's hash, or 0 if the vtable doesn't exist in this SymTab. + uint64_t getVTableHashFromAddress(uint64_t Address); + /// Return function's PGO name from the function name's symbol /// address in the object file. If an error occurs, return /// an empty string. @@ -574,6 +642,24 @@ Error InstrProfSymtab::create(const NameIterRange &IterRange) { return Error::success(); } +template +Error InstrProfSymtab::create(const FuncNameIterRange &FuncIterRange, + const VTableNameIterRange &VTableIterRange) { + // Iterate elements by StringRef rather than by const reference. + // StringRef is small enough, so the loop is efficient whether + // element in the range is std::string or StringRef. + for (StringRef Name : FuncIterRange) + if (Error E = addFuncName(Name)) + return E; + + for (StringRef VTableName : VTableIterRange) + if (Error E = addVTableName(VTableName)) + return E; + + finalizeSymtab(); + return Error::success(); +} + void InstrProfSymtab::finalizeSymtab() { if (Sorted) return; @@ -877,6 +963,8 @@ struct InstrProfRecord { return ValueData->IndirectCallSites; case IPVK_MemOPSize: return ValueData->MemOPSizes; + case IPVK_VTableTarget: + return ValueData->VTableTargets; default: llvm_unreachable("Unknown value kind!"); } diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index cfde5d3fc77d6..e46570af3873f 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -626,6 +626,12 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase { InstrProfKind getProfileKind() const override; Error populateSymtab(InstrProfSymtab &Symtab) override { + // FIXME: the create method calls 'finalizeSymtab' and sorts a bunch of + // arrays/maps. Since there are other data sources other than 'HashTable' to + // populate a symtab, it might make sense to have something like this + // 1. Let each data source populate Symtab and init the arrays/maps without + // calling 'finalizeSymtab' + // 2. Call 'finalizeSymtab' once to get all arrays/maps sorted if needed. return Symtab.create(HashTable->keys()); } }; diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index f70574d1f7563..07ae610a69058 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -63,6 +63,9 @@ class InstrProfWriter { // List of binary ids. std::vector BinaryIds; + // Read the vtable names from raw instr profile reader. + StringSet<> VTableNames; + // An enum describing the attributes of the profile. InstrProfKind ProfileKind = InstrProfKind::Unknown; // Use raw pointer here for the incomplete type object. @@ -92,6 +95,7 @@ class InstrProfWriter { void addRecord(NamedInstrProfRecord &&I, function_ref Warn) { addRecord(std::move(I), 1, Warn); } + void addVTableName(StringRef VTableName) { VTableNames.insert(VTableName); } /// Add \p SrcTraces using reservoir sampling where \p SrcStreamSize is the /// total number of temporal profiling traces the source has seen. diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 8cf97fcb1dab5..90c3cfc45b98a 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -57,6 +58,8 @@ using namespace llvm; +#define DEBUG_TYPE "instrprof" + static cl::opt StaticFuncFullModulePrefix( "static-func-full-module-prefix", cl::init(true), cl::Hidden, cl::desc("Use full module build paths in the profile counter names for " @@ -219,6 +222,12 @@ cl::opt DoInstrProfNameCompression( "enable-name-compression", cl::desc("Enable name/filename string compression"), cl::init(true)); +cl::opt EnableVTableValueProfiling( + "enable-vtable-value-profiling", cl::init(false), + cl::desc("If true, the virtual table address will be instrumented to know " + "the types of a C++ pointer. The information is used in indirect " + "call promotion to do selective vtable-based comparison.")); + std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo) { @@ -378,6 +387,13 @@ std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) { return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, ""); } +std::string getPGOName(const GlobalVariable &V, bool InLTO) { + // PGONameMetadata should be set by compiler at profile use time + // and read by symtab creation to look up symbols corresponding to + // a MD5 hash. + return getIRPGOObjectName(V, InLTO, /*PGONameMetadata=*/nullptr); +} + // See getIRPGOObjectName() for a discription of the format. std::pair getParsedIRPGOName(StringRef IRPGOName) { auto [FileName, MangledName] = IRPGOName.split(kGlobalIdentifierDelimiter); @@ -459,6 +475,7 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO))) return E; } + Sorted = false; finalizeSymtab(); return Error::success(); @@ -517,6 +534,25 @@ Error InstrProfSymtab::create(StringRef NameStrings) { std::bind(&InstrProfSymtab::addFuncName, this, std::placeholders::_1)); } +Error InstrProfSymtab::create(StringRef FuncNameStrings, + StringRef VTableNameStrings) { + if (Error E = readAndDecodeStrings(FuncNameStrings, + std::bind(&InstrProfSymtab::addFuncName, + this, std::placeholders::_1))) + return E; + + return readAndDecodeStrings( + VTableNameStrings, + std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); +} + +Error InstrProfSymtab::initVTableNamesFromCompressedStrings( + StringRef CompressedVTableStrings) { + return readAndDecodeStrings( + CompressedVTableStrings, + std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); +} + StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName) { // In ThinLTO, local function may have been promoted to global and have // suffix ".llvm." added to the function name. We need to add the @@ -560,6 +596,12 @@ Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { return Error::success(); } +uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) { + // Given a runtime address, look up the hash value in the interval map, and + // fallback to value 0 if a hash value is not found. + return VTableAddrMap.lookup(Address, 0); +} + uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) { finalizeSymtab(); auto It = partition_point(AddrToMD5Map, [=](std::pair A) { @@ -636,6 +678,16 @@ Error collectPGOFuncNameStrings(ArrayRef NameVars, NameStrs, compression::zlib::isAvailable() && doCompression, Result); } +Error collectVTableStrings(ArrayRef VTables, + std::string &Result, bool doCompression) { + std::vector VTableNameStrs; + for (auto *VTable : VTables) + VTableNameStrs.push_back(getPGOName(*VTable)); + return collectGlobalObjectNameStrings( + VTableNameStrs, compression::zlib::isAvailable() && doCompression, + Result); +} + void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const { uint64_t FuncSum = 0; Sum.NumEntries += Counts.size(); @@ -898,6 +950,9 @@ uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, if (ValueKind == IPVK_IndirectCallTarget) return SymTab->getFunctionHashFromAddress(Value); + if (ValueKind == IPVK_VTableTarget) + return SymTab->getVTableHashFromAddress(Value); + return Value; } @@ -1288,8 +1343,8 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) { F.setMetadata(getPGOFuncNameMetadataName(), N); } -bool needsComdatForCounter(const Function &F, const Module &M) { - if (F.hasComdat()) +bool needsComdatForCounter(const GlobalObject &GO, const Module &M) { + if (GO.hasComdat()) return true; if (!Triple(M.getTargetTriple()).supportsCOMDAT()) @@ -1305,7 +1360,7 @@ bool needsComdatForCounter(const Function &F, const Module &M) { // available_externally functions will end up being duplicated in raw profile // data. This can result in distorted profile as the counts of those dups // will be accumulated by the profile merger. - GlobalValue::LinkageTypes Linkage = F.getLinkage(); + GlobalValue::LinkageTypes Linkage = GO.getLinkage(); if (Linkage != GlobalValue::ExternalWeakLinkage && Linkage != GlobalValue::AvailableExternallyLinkage) return false; @@ -1461,7 +1516,7 @@ void OverlapStats::dump(raw_fd_ostream &OS) const { for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) { if (Base.ValueCounts[I] < 1.0f && Test.ValueCounts[I] < 1.0f) continue; - char ProfileKindName[20]; + char ProfileKindName[20] = {0}; switch (I) { case IPVK_IndirectCallTarget: strncpy(ProfileKindName, "IndirectCall", 19); @@ -1469,6 +1524,9 @@ void OverlapStats::dump(raw_fd_ostream &OS) const { case IPVK_MemOPSize: strncpy(ProfileKindName, "MemOP", 19); break; + case IPVK_VTableTarget: + strncpy(ProfileKindName, "VTable", 19); + break; default: snprintf(ProfileKindName, 19, "VP[%d]", I); break; diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 4f786a23f3545..e5fd4ea227ba4 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -369,8 +369,11 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { } else if (ValueKind == IPVK_VTableTarget) { if (InstrProfSymtab::isExternalSymbol(VD.first)) Value = 0; - else + else { + if (Error E = Symtab->addVTableName(VD.first)) + return E; Value = IndexedInstrProf::ComputeHash(VD.first); + } } else { READ_NUM(VD.first, Value); } @@ -538,7 +541,8 @@ Error RawInstrProfReader::readNextHeader(const char *CurrentPos) { template Error RawInstrProfReader::createSymtab(InstrProfSymtab &Symtab) { - if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart))) + if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart), + StringRef(VNamesStart, VNamesEnd - VNamesStart))) return error(std::move(E)); for (const RawInstrProf::ProfileData *I = Data; I != DataEnd; ++I) { const IntPtrT FPtr = swap(I->FunctionPointer); @@ -546,6 +550,21 @@ Error RawInstrProfReader::createSymtab(InstrProfSymtab &Symtab) { continue; Symtab.mapAddress(FPtr, swap(I->NameRef)); } + + if (VTableBegin != nullptr && VTableEnd != nullptr) { + for (const RawInstrProf::VTableProfileData *I = VTableBegin; + I != VTableEnd; ++I) { + const IntPtrT VPtr = swap(I->VTablePointer); + if (!VPtr) + continue; + // Map both begin and end address to the name hash, since the instrumented + // address could be somewhere in the middle. + // VPtr is of type uint32_t or uint64_t so 'VPtr + I->VTableSize' marks + // the end of vtable address. + Symtab.mapVTableAddress(VPtr, VPtr + swap(I->VTableSize), + swap(I->VTableNameHash)); + } + } return success(); } @@ -1367,7 +1386,15 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { if (Symtab) return *Symtab; - std::unique_ptr NewSymtab = std::make_unique(); + auto NewSymtab = std::make_unique(); + + if (Error E = NewSymtab->initVTableNamesFromCompressedStrings( + StringRef(VTableNamePtr, CompressedVTableNamesLen))) { + auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); + consumeError(error(ErrCode, Msg)); + } + + // finalizeSymtab is called inside populateSymtab. if (Error E = Index->populateSymtab(*NewSymtab)) { auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); consumeError(error(ErrCode, Msg)); diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index d9fe88a00bdfc..b0c4669ab2272 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Error.h" @@ -618,13 +619,18 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { uint64_t VTableNamesSectionStart = OS.tell(); if (!WritePrevVersion) { - // Use a dummy (and uncompressed) string as compressed vtable names and get - // the necessary profile format change in place for version 12. - // TODO: Store the list of vtable names in InstrProfWriter and use the - // real compressed name. - std::string CompressedVTableNames = "VTableNames"; + std::vector VTableNameStrs; + for (StringRef VTableName : VTableNames.keys()) + VTableNameStrs.push_back(VTableName.str()); + + std::string CompressedVTableNames; + if (!VTableNameStrs.empty()) + if (Error E = collectGlobalObjectNameStrings( + VTableNameStrs, compression::zlib::isAvailable(), + CompressedVTableNames)) + return E; - uint64_t CompressedStringLen = CompressedVTableNames.length(); + const uint64_t CompressedStringLen = CompressedVTableNames.length(); // Record the length of compressed string. OS.write(CompressedStringLen); @@ -634,12 +640,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { OS.writeByte(static_cast(c)); // Pad up to a multiple of 8. - // InstrProfReader would read bytes according to 'CompressedStringLen'. - uint64_t PaddedLength = alignTo(CompressedStringLen, 8); + // InstrProfReader could read bytes according to 'CompressedStringLen'. + const uint64_t PaddedLength = alignTo(CompressedStringLen, 8); - for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) { + for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) OS.writeByte(0); - } } uint64_t TemporalProfTracesSectionStart = 0; @@ -848,6 +853,10 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { } } + for (const auto &VTableName : VTableNames) + if (Error E = Symtab.addVTableName(VTableName.getKey())) + return E; + if (static_cast(ProfileKind & InstrProfKind::TemporalProfile)) writeTextTemporalProfTraceData(OS, Symtab); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index c42c53edd5119..f9b58d9f27821 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -64,6 +64,9 @@ using namespace llvm; #define DEBUG_TYPE "instrprof" namespace llvm { +// Command line option to enable vtable value profiling. Defined in +// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= +extern cl::opt EnableVTableValueProfiling; // TODO: Remove -debug-info-correlate in next LLVM release, in favor of // -profile-correlate=debug-info. cl::opt DebugInfoCorrelate( @@ -219,12 +222,18 @@ class InstrLowerer final { PerFunctionProfileData() = default; }; DenseMap ProfileDataMap; + // Key is virtual table variable, value is 'VTableProfData' in the form of + // GlobalVariable. + DenseMap VTableDataMap; /// If runtime relocation is enabled, this maps functions to the load /// instruction that produces the profile relocation bias. DenseMap FunctionToProfileBiasMap; std::vector CompilerUsedVars; std::vector UsedVars; std::vector ReferencedNames; + // The list of virtual table variables of which the VTableProfData is + // collected. + std::vector ReferencedVTables; GlobalVariable *NamesVar = nullptr; size_t NamesSize = 0; @@ -308,7 +317,7 @@ class InstrLowerer final { GlobalValue::LinkageTypes Linkage); /// Set Comdat property of GV, if required. - void maybeSetComdat(GlobalVariable *GV, Function *Fn, StringRef VarName); + void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName); /// Setup the sections into which counters and bitmaps are allocated. GlobalVariable *setupProfileSection(InstrProfInstBase *Inc, @@ -317,9 +326,15 @@ class InstrLowerer final { /// Create INSTR_PROF_DATA variable for counters and bitmaps. void createDataVariable(InstrProfCntrInstBase *Inc); + /// Get the counters for virtual table values, creating them if necessary. + void getOrCreateVTableProfData(GlobalVariable *GV); + /// Emit the section with compressed function names. void emitNameData(); + /// Emit the section with compressed vtable names. + void emitVTableNames(); + /// Emit value nodes section for value profiling. void emitVNodes(); @@ -763,6 +778,12 @@ bool InstrLowerer::lower() { } } + if (EnableVTableValueProfiling) + for (GlobalVariable &GV : M.globals()) + // Global variables with type metadata are virtual table variables. + if (GV.hasMetadata(LLVMContext::MD_type)) + getOrCreateVTableProfData(&GV); + for (Function &F : M) MadeChange |= lowerIntrinsics(&F); @@ -776,6 +797,7 @@ bool InstrLowerer::lower() { emitVNodes(); emitNameData(); + emitVTableNames(); // Emit runtime hook for the cases where the target does not unconditionally // require pulling in profile runtime, and coverage is enabled on code that is @@ -1193,13 +1215,13 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { return true; } -void InstrLowerer::maybeSetComdat(GlobalVariable *GV, Function *Fn, +void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef CounterGroupName) { // Place lowered global variables in a comdat group if the associated function - // is a COMDAT. This will make sure that only one copy of global variable - // (e.g. function counters) of the COMDAT function will be emitted after - // linking. - bool NeedComdat = needsComdatForCounter(*Fn, M); + // or global variable is a COMDAT. This will make sure that only one copy of + // global variable (e.g. function counters) of the COMDAT function will be + // emitted after linking. + bool NeedComdat = needsComdatForCounter(*GO, M); bool UseComdat = (NeedComdat || TT.isOSBinFormatELF()); if (!UseComdat) @@ -1237,6 +1259,104 @@ void InstrLowerer::maybeSetComdat(GlobalVariable *GV, Function *Fn, GV->setLinkage(GlobalValue::InternalLinkage); } +static inline bool shouldRecordVTableAddr(GlobalVariable *GV) { + if (!profDataReferencedByCode(*GV->getParent())) + return false; + + if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() && + !GV->hasAvailableExternallyLinkage()) + return true; + + // This avoids the profile data from referencing internal symbols in + // COMDAT. + if (GV->hasLocalLinkage() && GV->hasComdat()) + return false; + + return true; +} + +// FIXME: Introduce an internal alias like what's done for functions to reduce +// the number of relocation entries. +static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) { + auto *Int8PtrTy = PointerType::getUnqual(GV->getContext()); + + // Store a nullptr in __profvt_ if a real address shouldn't be used. + if (!shouldRecordVTableAddr(GV)) + return ConstantPointerNull::get(Int8PtrTy); + + return ConstantExpr::getBitCast(GV, Int8PtrTy); +} + +void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) { + assert(!DebugInfoCorrelate && + "Value profiling is not supported with lightweight instrumentation"); + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + return; + + // Skip llvm internal global variable or __prof variables. + if (GV->getName().starts_with("llvm.") || + GV->getName().starts_with("__llvm") || + GV->getName().starts_with("__prof")) + return; + + // VTableProfData already created + auto It = VTableDataMap.find(GV); + if (It != VTableDataMap.end() && It->second) + return; + + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + GlobalValue::VisibilityTypes Visibility = GV->getVisibility(); + + // This is to keep consistent with per-function profile data + // for correctness. + if (TT.isOSBinFormatXCOFF()) { + Linkage = GlobalValue::InternalLinkage; + Visibility = GlobalValue::DefaultVisibility; + } + + LLVMContext &Ctx = M.getContext(); + Type *DataTypes[] = { +#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType, +#include "llvm/ProfileData/InstrProfData.inc" +#undef INSTR_PROF_VTABLE_DATA + }; + + auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes)); + + // Used by INSTR_PROF_VTABLE_DATA MACRO + Constant *VTableAddr = getVTableAddrForProfData(GV); + const std::string PGOVTableName = getPGOName(*GV); + // Record the length of the vtable. This is needed since vtable pointers + // loaded from C++ objects might be from the middle of a vtable definition. + uint32_t VTableSizeVal = + M.getDataLayout().getTypeAllocSize(GV->getValueType()); + + Constant *DataVals[] = { +#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init, +#include "llvm/ProfileData/InstrProfData.inc" +#undef INSTR_PROF_VTABLE_DATA + }; + + auto *Data = + new GlobalVariable(M, DataTy, /*constant=*/false, Linkage, + ConstantStruct::get(DataTy, DataVals), + getInstrProfVTableVarPrefix() + PGOVTableName); + + Data->setVisibility(Visibility); + Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat())); + Data->setAlignment(Align(8)); + + maybeSetComdat(Data, GV, Data->getName()); + + VTableDataMap[GV] = Data; + + ReferencedVTables.push_back(GV); + + // VTable is used by runtime but not referenced by other + // sections. Conservatively mark it linker retained. + UsedVars.push_back(Data); +} + GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc, InstrProfSectKind IPSK) { GlobalVariable *NamePtr = Inc->getName(); @@ -1633,6 +1753,31 @@ void InstrLowerer::emitNameData() { NamePtr->eraseFromParent(); } +void InstrLowerer::emitVTableNames() { + if (!EnableVTableValueProfiling || ReferencedVTables.empty()) + return; + + // Collect the PGO names of referenced vtables and compress them. + std::string CompressedVTableNames; + if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames, + DoInstrProfNameCompression)) { + report_fatal_error(Twine(toString(std::move(E))), false); + } + + auto &Ctx = M.getContext(); + auto *VTableNamesVal = ConstantDataArray::getString( + Ctx, StringRef(CompressedVTableNames), false /* AddNull */); + GlobalVariable *VTableNamesVar = + new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */, + GlobalValue::PrivateLinkage, VTableNamesVal, + getInstrProfVTableNamesVarName()); + VTableNamesVar->setSection( + getInstrProfSectionName(IPSK_vname, TT.getObjectFormat())); + VTableNamesVar->setAlignment(Align(1)); + // Make VTableNames linker retained. + UsedVars.push_back(VTableNamesVar); +} + void InstrLowerer::emitRegistration() { if (!needsRuntimeRegistrationOfSectionRange(TT)) return; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 50eccc69a38a0..98c6f8cbf5afe 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -327,6 +327,9 @@ extern cl::opt PGOViewCounts; // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= extern cl::opt ViewBlockFreqFuncName; +// Command line option to enable vtable value profiling. Defined in +// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= +extern cl::opt EnableVTableValueProfiling; extern cl::opt ProfileCorrelate; } // namespace llvm @@ -581,6 +584,8 @@ template class FuncPGOInstrumentation { NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); NumOfPGOBB += MST.bbInfoSize(); ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); + if (EnableVTableValueProfiling) + ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget); } else { NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); @@ -1775,6 +1780,15 @@ static bool InstrumentAllFunctions( // (before LTO/ThinLTO linking) to create these variables. if (!IsCS) createIRLevelProfileFlagVar(M, /*IsCS=*/false); + + Triple TT(M.getTargetTriple()); + LLVMContext &Ctx = M.getContext(); + if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling) + Ctx.diagnose(DiagnosticInfoPGOProfile( + M.getName().data(), + Twine("VTable value profiling is presently not " + "supported for non-ELF object formats"), + DS_Warning)); std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc index 3a129de1acd02..b47ef8523ea11 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc +++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -90,9 +90,38 @@ public: } }; +///--------------------- VirtualTableValueProfilingPlugin -------------------- +class VTableProfilingPlugin { + Function &F; + +public: + static constexpr InstrProfValueKind Kind = IPVK_VTableTarget; + + VTableProfilingPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {} + + void run(std::vector &Candidates) { + std::vector Result = findVTableAddrs(F); + for (Instruction *I : Result) { + Instruction *InsertPt = I->getNextNonDebugInstruction(); + // When finding an insertion point, keep PHI and EH pad instructions + // before vp intrinsics. This is similar to + // `BasicBlock::getFirstInsertionPt`. + while (InsertPt && (dyn_cast(InsertPt) || InsertPt->isEHPad())) + InsertPt = InsertPt->getNextNonDebugInstruction(); + // Skip instrumentating the value if InsertPt is the last instruction. + // FIXME: Set InsertPt to the end of basic block to instrument the value + // if InsertPt is the last instruction. + if (InsertPt == nullptr) + continue; + + Instruction *AnnotatedInst = I; + Candidates.emplace_back(CandidateInfo{I, InsertPt, AnnotatedInst}); + } + } +}; + ///----------------------- Registration of the plugins ------------------------- /// For now, registering a plugin with the ValueProfileCollector is done by /// adding the plugin type to the VP_PLUGIN_LIST macro. -#define VP_PLUGIN_LIST \ - MemIntrinsicPlugin, \ - IndirectCallPromotionPlugin +#define VP_PLUGIN_LIST \ + MemIntrinsicPlugin, IndirectCallPromotionPlugin, VTableProfilingPlugin diff --git a/llvm/test/Transforms/PGOProfile/vtable_prof_unsupported.ll b/llvm/test/Transforms/PGOProfile/vtable_prof_unsupported.ll new file mode 100644 index 0000000000000..f72a20fdc71a6 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/vtable_prof_unsupported.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -passes=pgo-instr-gen -enable-vtable-value-profiling -S 2>&1 | FileCheck %s + +; Test that unsupported warning is emitted for non-ELF object files. +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx14.0.0" + +; CHECK: warning: {{.*}} VTable value profiling is presently not supported for non-ELF object formats + +@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base4funcEi] }, !type !0, !type !1 +@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived4funcEi] }, !type !0, !type !1, !type !2, !type !3 + +@llvm.compiler.used = appending global [2 x ptr] [ptr @_ZTV4Base, ptr @_ZTV7Derived], section "llvm.metadata" + +define i32 @_Z4funci(i32 %a) { +entry: + %call = call ptr @_Z10createTypev() + %vtable = load ptr, ptr %call + %0 = call i1 @llvm.public.type.test(ptr %vtable, metadata !"_ZTS7Derived") + call void @llvm.assume(i1 %0) + %1 = load ptr, ptr %vtable + %call1 = call i32 %1(ptr %call, i32 %a) + ret i32 %call1 +} + +declare ptr @_Z10createTypev() +declare i1 @llvm.public.type.test(ptr, metadata) +declare void @llvm.assume(i1) +declare i32 @_ZN4Base4funcEi(ptr, i32) +declare i32 @_ZN7Derived4funcEi(ptr , i32) + +!0 = !{i64 16, !"_ZTS4Base"} +!1 = !{i64 16, !"_ZTSM4BaseFiiE.virtual"} +!2 = !{i64 16, !"_ZTS7Derived"} +!3 = !{i64 16, !"_ZTSM7DerivedFiiE.virtual"} diff --git a/llvm/test/Transforms/PGOProfile/vtable_profile.ll b/llvm/test/Transforms/PGOProfile/vtable_profile.ll new file mode 100644 index 0000000000000..a8440031e1493 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/vtable_profile.ll @@ -0,0 +1,98 @@ +; RUN: opt < %s -passes=pgo-instr-gen -enable-vtable-value-profiling -S 2>&1 | FileCheck %s --check-prefix=GEN --implicit-check-not="VTable value profiling is presently not supported" +; RUN: opt < %s -passes=pgo-instr-gen,instrprof -enable-vtable-value-profiling -S 2>&1 | FileCheck %s --check-prefix=LOWER --implicit-check-not="VTable value profiling is presently not supported" + +; __llvm_prf_vnm stores zlib-compressed vtable names. +; REQUIRES: zlib + +source_filename = "vtable_local.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The test IR is generated based on the following C++ program. +; Base1 has external linkage and Base2 has local linkage. +; class Derived uses multiple inheritance so its virtual table +; global variable contains two vtables. func1 is loaded from +; the vtable compatible with class Base1, and func2 is loaded +; from the vtable compatible with class Base2. + +; class Base1 { +; public: +; virtual int func1(int a) ; +; }; +; +; namespace { +; class Base2 { +; public: +; __attribute__((noinline)) virtual int func2(int a) { +; return a; +; } +; }; +; } + +; class Derived : public Base1, public Base2 { +; public: +; Derived(int c) : v(c) {} +; private: +; int v; +; }; +; +; Derived* createType(); + +; int func(int a) { +; Derived* d = createType(); +; return d->func2(a) + d->func1(a); +; } + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZTV7Derived = constant { [3 x ptr], [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN12_GLOBAL__N_15Base25func2Ei] }, !type !0, !type !3, !type !6, !type !8, !type !10 +@_ZTV5Base1 = available_externally constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei] }, !type !0 +@_ZTVN12_GLOBAL__N_15Base2E = internal constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN12_GLOBAL__N_15Base25func2Ei] }, !type !11, !type !8; !vcall_visibility !12 +@llvm.compiler.used = appending global [1 x ptr] [ptr @_ZTV5Base1], section "llvm.metadata" + +; GEN: __llvm_profile_raw_version = comdat any +; GEN: __llvm_profile_raw_version = hidden constant i64 72057594037927946, comdat +; GEN: __profn__Z4funci = private constant [8 x i8] c"_Z4funci" + +; LOWER: $__profvt__ZTV7Derived = comdat nodeduplicate +; LOWER: $"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E" = comdat nodeduplicate +; LOWER: @__profvt__ZTV7Derived = global { i64, ptr, i32 } { i64 -4576307468236080025, ptr @_ZTV7Derived, i32 48 }, section "__llvm_prf_vtab", comdat, align 8 +; LOWER: @"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E" = internal global { i64, ptr, i32 } { i64 1419990121885302679, ptr @_ZTVN12_GLOBAL__N_15Base2E, i32 24 }, section "__llvm_prf_vtab", comdat, align 8 +; LOWER: @__llvm_prf_vnm = private constant [64 x i8] c"7>x\DA\8B\8F\0A\093wI-\CA,KMa,+IL\CAI\8D\CF\C9ON\CC\D1\CB\C9\B1\8E\07J\FA\19\1A\C5\BB\FB\F8;9\FA\C4\C7\FB\C5\1B\9A:%\16\A7\1A\B9\02\00\19:\12o", section "__llvm_prf_vns", align 1 +; LOWER: @llvm.used = appending global [5 x ptr] [ptr @__profvt__ZTV7Derived, ptr @"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E", ptr @__llvm_prf_vnodes, ptr @__llvm_prf_nm, ptr @__llvm_prf_vnm], section "llvm.metadata" + +define i32 @_Z4funci(i32 %a) { +entry: + %call = call ptr @_Z10createTypev() + %add.ptr = getelementptr inbounds i8, ptr %call, i64 8 + %vtable = load ptr, ptr %add.ptr +; GEN: [[P1:%[0-9]+]] = ptrtoint ptr %vtable to i64 +; GEN: call void @llvm.instrprof.value.profile(ptr @__profn__Z4funci, i64 [[CFGHash:[0-9]+]], i64 [[P1]], i32 2, i32 0) +; LOWER: [[P1:%[0-9]+]] = ptrtoint ptr %vtable to i64 +; LOWER: call void @__llvm_profile_instrument_target(i64 [[P1]], ptr @__profd__Z4funci, i32 2) + %vfunc1 = load ptr, ptr %vtable + %call1 = call i32 %vfunc1(ptr %add.ptr, i32 %a) + %vtable2 = load ptr, ptr %call +; GEN: [[P2:%[0-9]+]] = ptrtoint ptr %vtable2 to i64 +; GEN: call void @llvm.instrprof.value.profile(ptr @__profn__Z4funci, i64 [[CFGHash]], i64 [[P2]], i32 2, i32 1) +; LOWER: [[P2:%[0-9]+]] = ptrtoint ptr %vtable2 to i64 +; LOWER: call void @__llvm_profile_instrument_target(i64 [[P2]], ptr @__profd__Z4funci, i32 3) + %vfunc2 = load ptr, ptr %vtable2 + %call4 = call i32 %vfunc2(ptr %call, i32 %a) + %add = add nsw i32 %call1, %call4 + ret i32 %add +} + +declare ptr @_Z10createTypev() +declare i32 @_ZN12_GLOBAL__N_15Base25func2Ei(ptr %this, i32 %a) +declare i32 @_ZN5Base15func1Ei(ptr, i32) + +!0 = !{i64 16, !"_ZTS5Base1"} +!3 = !{i64 16, !"_ZTS7Derived"} +!6 = !{i64 40, !7} +!7 = distinct !{} +!8 = !{i64 16, !9} +!9 = distinct !{} +!10 = !{i64 40, !9} +!11 = !{i64 16, !7} diff --git a/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext new file mode 100644 index 0000000000000..372f9f97b1645 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext @@ -0,0 +1,74 @@ +# IR level Instrumentation Flag +:ir +_Z10createTypei +# Func Hash: +146835647075900052 +# Num Counters: +2 +# Counter Values: +750 +250 + +_ZN8Derived15func1Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +250 + +_ZN8Derived15func2Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +250 + +main +# Func Hash: +1124236338992350536 +# Num Counters: +2 +# Counter Values: +1000 +1 +# Num Value Kinds: +2 +# ValueKind = IPVK_IndirectCallTarget: +0 +# NumValueSites: +2 +2 +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii:750 +_ZN8Derived15func1Eii:250 +2 +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii:750 +_ZN8Derived15func2Eii:250 +# ValueKind = IPVK_VTableTarget: +2 +# NumValueSites: +2 +2 +vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +_ZTV8Derived1:250 +2 +vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +_ZTV8Derived1:250 + +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +750 + +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +750 + diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof.test b/llvm/test/tools/llvm-profdata/vtable-value-prof.test new file mode 100644 index 0000000000000..378c2e11b236b --- /dev/null +++ b/llvm/test/tools/llvm-profdata/vtable-value-prof.test @@ -0,0 +1,83 @@ +; RUN: rm -rf %t && mkdir %t && cd %t + +; Generate indexed profiles from text profiles +RUN: llvm-profdata merge %S/Inputs/vtable-value-prof.proftext -o indexed.profdata + +; Show indexed profiles +RUN: llvm-profdata show --function=main --ic-targets --show-vtables indexed.profdata | FileCheck %s --check-prefix=INDEXED + +; Show text profiles +RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %S/Inputs/vtable-value-prof.proftext | FileCheck %s --check-prefix=ICTEXT + +; Convert indexed profiles to its textual output and show it. +RUN: llvm-profdata merge --text -o text-from-indexed.proftext indexed.profdata +RUN: llvm-profdata show --function=main --ic-targets --show-vtables text-from-indexed.proftext | FileCheck %s --check-prefix=INDEXED +RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text text-from-indexed.proftext | FileCheck %s --check-prefix=ICTEXT + +INDEXED: Counters: +INDEXED-NEXT: main: +INDEXED-NEXT: Hash: 0x0f9a16fe6d398548 +INDEXED-NEXT: Counters: 2 +INDEXED-NEXT: Indirect Call Site Count: 2 +INDEXED-NEXT: Number of instrumented vtables: 2 +INDEXED-NEXT: Indirect Target Results: +INDEXED-NEXT: [ 0, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%) +INDEXED-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%) +INDEXED-NEXT: [ 1, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%) +INDEXED-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%) +INDEXED-NEXT: VTable Results: +INDEXED-NEXT: [ 0, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +INDEXED-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%) +INDEXED-NEXT: [ 1, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) +INDEXED-NEXT: Instrumentation level: IR entry_first = 0 +INDEXED-NEXT: Functions shown: 1 +INDEXED-NEXT: Total functions: 6 +INDEXED-NEXT: Maximum function count: 1000 +INDEXED-NEXT: Maximum internal block count: 250 +INDEXED-NEXT: Statistics for indirect call sites profile: +INDEXED-NEXT: Total number of sites: 2 +INDEXED-NEXT: Total number of sites with values: 2 +INDEXED-NEXT: Total number of profiled values: 4 +INDEXED-NEXT: Value sites histogram: +INDEXED-NEXT: NumTargets, SiteCount +INDEXED-NEXT: 2, 2 +INDEXED-NEXT: Statistics for vtable profile: +INDEXED-NEXT: Total number of sites: 2 +INDEXED-NEXT: Total number of sites with values: 2 +INDEXED-NEXT: Total number of profiled values: 4 +INDEXED-NEXT: Value sites histogram: +INDEXED-NEXT: NumTargets, SiteCount +INDEXED-NEXT: 2, 2 + +ICTEXT: :ir +ICTEXT: main +ICTEXT: # Func Hash: +ICTEXT: 1124236338992350536 +ICTEXT: # Num Counters: +ICTEXT: 2 +ICTEXT: # Counter Values: +ICTEXT: 1000 +ICTEXT: 1 +ICTEXT: # Num Value Kinds: +ICTEXT: 2 +ICTEXT: # ValueKind = IPVK_IndirectCallTarget: +ICTEXT: 0 +ICTEXT: # NumValueSites: +ICTEXT: 2 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii:750 +ICTEXT: _ZN8Derived15func1Eii:250 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii:750 +ICTEXT: _ZN8Derived15func2Eii:250 +ICTEXT: # ValueKind = IPVK_VTableTarget: +ICTEXT: 2 +ICTEXT: # NumValueSites: +ICTEXT: 2 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +ICTEXT: _ZTV8Derived1:250 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +ICTEXT: _ZTV8Derived1:250 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 3a7bd061d3d23..960ef5efed904 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -354,6 +354,9 @@ cl::opt ShowIndirectCallTargets( "ic-targets", cl::init(false), cl::desc("Show indirect call site target values for shown functions"), cl::sub(ShowSubcommand)); +cl::opt ShowVTables("show-vtables", cl::init(false), + cl::desc("Show vtable names for shown functions"), + cl::sub(ShowSubcommand)); cl::opt ShowMemOPSizes( "memop-sizes", cl::init(false), cl::desc("Show the profiled sizes of the memory intrinsic calls " @@ -731,6 +734,13 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, }); } + const InstrProfSymtab &symtab = Reader->getSymtab(); + const auto &VTableNames = symtab.getVTableNames(); + + for (const auto &kv : VTableNames) { + WC->Writer.addVTableName(kv.getKey()); + } + if (Reader->hasTemporalProfile()) { auto &Traces = Reader->getTemporalProfTraces(Input.Weight); if (!Traces.empty()) @@ -2826,6 +2836,10 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) { OS << " Indirect Call Site Count: " << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n"; + if (ShowVTables) + OS << " Number of instrumented vtables: " + << Func.getNumValueSites(IPVK_VTableTarget) << "\n"; + uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize); if (ShowMemOPSizes && NumMemOPCalls > 0) OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls @@ -2847,6 +2861,13 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) { &(Reader->getSymtab())); } + if (ShowVTables) { + OS << " VTable Results:\n"; + traverseAllValueSites(Func, IPVK_VTableTarget, + VPStats[IPVK_VTableTarget], OS, + &(Reader->getSymtab())); + } + if (ShowMemOPSizes && NumMemOPCalls > 0) { OS << " Memory Intrinsic Size Results:\n"; traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS, @@ -2895,6 +2916,11 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) { VPStats[IPVK_IndirectCallTarget]); } + if (ShownFunctions && ShowVTables) { + OS << "Statistics for vtable profile:\n"; + showValueSitesStats(OS, IPVK_VTableTarget, VPStats[IPVK_VTableTarget]); + } + if (ShownFunctions && ShowMemOPSizes) { OS << "Statistics for memory intrinsic calls sizes profile:\n"; showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]); diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index c9323420bda79..732f8fd792f8d 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -638,32 +638,78 @@ TEST_F(InstrProfTest, test_irpgo_read_deprecated_names) { Succeeded()); } +// callee1 to callee6 are from vtable1 to vtable6 respectively. static const char callee1[] = "callee1"; static const char callee2[] = "callee2"; static const char callee3[] = "callee3"; static const char callee4[] = "callee4"; static const char callee5[] = "callee5"; static const char callee6[] = "callee6"; +// callee7 and callee8 are not from any vtables. +static const char callee7[] = "callee7"; +static const char callee8[] = "callee8"; +// 'callee' is primarily used to create multiple-element vtables. +static const char callee[] = "callee"; +static const uint64_t vtable1[] = {uint64_t(callee), uint64_t(callee1)}; +static const uint64_t vtable2[] = {uint64_t(callee2), uint64_t(callee)}; +static const uint64_t vtable3[] = { + uint64_t(callee), + uint64_t(callee3), +}; +static const uint64_t vtable4[] = {uint64_t(callee4), uint64_t(callee)}; +static const uint64_t vtable5[] = {uint64_t(callee5), uint64_t(callee)}; +static const uint64_t vtable6[] = {uint64_t(callee6), uint64_t(callee)}; + +// Returns the address of callee with a numbered suffix in vtable. +static uint64_t getCalleeAddress(const uint64_t *vtableAddr) { + uint64_t CalleeAddr; + // Callee with a numbered suffix is the 2nd element in vtable1 and vtable3, + // and the 1st element in the rest of vtables. + if (vtableAddr == vtable1 || vtableAddr == vtable3) + CalleeAddr = uint64_t(vtableAddr) + 8; + else + CalleeAddr = uint64_t(vtableAddr); + return CalleeAddr; +} -TEST_P(InstrProfReaderWriterTest, icall_data_read_write) { +TEST_P(InstrProfReaderWriterTest, icall_and_vtable_data_read_write) { NamedInstrProfRecord Record1("caller", 0x1234, {1, 2}); - // 4 value sites. - Record1.reserveSites(IPVK_IndirectCallTarget, 4); - InstrProfValueData VD0[] = { - {(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}, {(uint64_t)callee3, 3}}; - Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr); - // No value profile data at the second site. - Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr); - InstrProfValueData VD2[] = {{(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}}; - Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr); - InstrProfValueData VD3[] = {{(uint64_t)callee1, 1}}; - Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr); + // 4 indirect call value sites. + { + Record1.reserveSites(IPVK_IndirectCallTarget, 4); + InstrProfValueData VD0[] = { + {(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}, {(uint64_t)callee3, 3}}; + Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr); + // No value profile data at the second site. + Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr); + InstrProfValueData VD2[] = {{(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}}; + Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr); + InstrProfValueData VD3[] = {{(uint64_t)callee7, 1}, {(uint64_t)callee8, 2}}; + Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr); + } + + // 2 vtable value sites. + { + InstrProfValueData VD0[] = { + {getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + {getCalleeAddress(vtable3), 3}, + }; + InstrProfValueData VD2[] = { + {getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + }; + Record1.addValueData(IPVK_VTableTarget, 0, VD0, 3, nullptr); + Record1.addValueData(IPVK_VTableTarget, 2, VD2, 2, nullptr); + } Writer.addRecord(std::move(Record1), getProfWeight(), Err); Writer.addRecord({"callee1", 0x1235, {3, 4}}, Err); Writer.addRecord({"callee2", 0x1235, {3, 4}}, Err); Writer.addRecord({"callee3", 0x1235, {3, 4}}, Err); + Writer.addRecord({"callee7", 0x1235, {3, 4}}, Err); + Writer.addRecord({"callee8", 0x1235, {3, 4}}, Err); // Set writer value prof data endianness. Writer.setValueProfDataEndianness(getEndianness()); @@ -676,24 +722,63 @@ TEST_P(InstrProfReaderWriterTest, icall_data_read_write) { Expected R = Reader->getInstrProfRecord("caller", 0x1234); ASSERT_THAT_ERROR(R.takeError(), Succeeded()); + + // Test the number of instrumented indirect call sites and the number of + // profiled values at each site. ASSERT_EQ(4U, R->getNumValueSites(IPVK_IndirectCallTarget)); EXPECT_EQ(3U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 0)); EXPECT_EQ(0U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 1)); EXPECT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 2)); - EXPECT_EQ(1U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); + EXPECT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); + + // Test the number of instrumented vtable sites and the number of profiled + // values at each site. + ASSERT_EQ(R->getNumValueSites(IPVK_VTableTarget), 2U); + EXPECT_EQ(R->getNumValueDataForSite(IPVK_VTableTarget, 0), 3U); + EXPECT_EQ(R->getNumValueDataForSite(IPVK_VTableTarget, 1), 2U); + + // First indirect site. + { + uint64_t TotalC; + auto VD = R->getValueForSite(IPVK_IndirectCallTarget, 0, &TotalC); + + EXPECT_EQ(VD[0].Count, 3U * getProfWeight()); + EXPECT_EQ(VD[1].Count, 2U * getProfWeight()); + EXPECT_EQ(VD[2].Count, 1U * getProfWeight()); + EXPECT_EQ(TotalC, 6U * getProfWeight()); + + EXPECT_STREQ((const char *)VD[0].Value, "callee3"); + EXPECT_STREQ((const char *)VD[1].Value, "callee2"); + EXPECT_STREQ((const char *)VD[2].Value, "callee1"); + } - uint64_t TotalC; - std::unique_ptr VD = - R->getValueForSite(IPVK_IndirectCallTarget, 0, &TotalC); + // First vtable site. + { + uint64_t TotalC; + auto VD = R->getValueForSite(IPVK_VTableTarget, 0, &TotalC); + + EXPECT_EQ(VD[0].Count, 3U * getProfWeight()); + EXPECT_EQ(VD[1].Count, 2U * getProfWeight()); + EXPECT_EQ(VD[2].Count, 1U * getProfWeight()); + EXPECT_EQ(TotalC, 6U * getProfWeight()); + + EXPECT_EQ(VD[0].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD[1].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD[2].Value, getCalleeAddress(vtable1)); + } + + // Second vtable site. + { + uint64_t TotalC; + auto VD = R->getValueForSite(IPVK_VTableTarget, 1, &TotalC); - EXPECT_EQ(3U * getProfWeight(), VD[0].Count); - EXPECT_EQ(2U * getProfWeight(), VD[1].Count); - EXPECT_EQ(1U * getProfWeight(), VD[2].Count); - EXPECT_EQ(6U * getProfWeight(), TotalC); + EXPECT_EQ(VD[0].Count, 2U * getProfWeight()); + EXPECT_EQ(VD[1].Count, 1U * getProfWeight()); + EXPECT_EQ(TotalC, 3U * getProfWeight()); - EXPECT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3")); - EXPECT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2")); - EXPECT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1")); + EXPECT_EQ(VD[0].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD[1].Value, getCalleeAddress(vtable1)); + } } INSTANTIATE_TEST_SUITE_P( @@ -801,33 +886,53 @@ TEST_P(MaybeSparseInstrProfTest, annotate_vp_data) { ASSERT_EQ(1U, ValueData[3].Count); } -TEST_P(MaybeSparseInstrProfTest, icall_data_merge) { +TEST_P(MaybeSparseInstrProfTest, icall_and_vtable_data_merge) { static const char caller[] = "caller"; NamedInstrProfRecord Record11(caller, 0x1234, {1, 2}); NamedInstrProfRecord Record12(caller, 0x1234, {1, 2}); - // 5 value sites. - Record11.reserveSites(IPVK_IndirectCallTarget, 5); - InstrProfValueData VD0[] = {{uint64_t(callee1), 1}, - {uint64_t(callee2), 2}, - {uint64_t(callee3), 3}, - {uint64_t(callee4), 4}}; - Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr); + // 5 value sites for indirect calls. + { + Record11.reserveSites(IPVK_IndirectCallTarget, 5); + InstrProfValueData VD0[] = {{uint64_t(callee1), 1}, + {uint64_t(callee2), 2}, + {uint64_t(callee3), 3}, + {uint64_t(callee4), 4}}; + Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr); - // No value profile data at the second site. - Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr); + // No value profile data at the second site. + Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr); - InstrProfValueData VD2[] = { - {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}}; - Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr); + InstrProfValueData VD2[] = { + {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}}; + Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr); - InstrProfValueData VD3[] = {{uint64_t(callee1), 1}}; - Record11.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr); + InstrProfValueData VD3[] = {{uint64_t(callee7), 1}, {uint64_t(callee8), 2}}; + Record11.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr); - InstrProfValueData VD4[] = {{uint64_t(callee1), 1}, - {uint64_t(callee2), 2}, - {uint64_t(callee3), 3}}; - Record11.addValueData(IPVK_IndirectCallTarget, 4, VD4, 3, nullptr); + InstrProfValueData VD4[] = { + {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}}; + Record11.addValueData(IPVK_IndirectCallTarget, 4, VD4, 3, nullptr); + } + // 3 value sites for vtables. + { + Record11.reserveSites(IPVK_VTableTarget, 3); + InstrProfValueData VD0[] = {{getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + {getCalleeAddress(vtable3), 3}, + {getCalleeAddress(vtable4), 4}}; + Record11.addValueData(IPVK_VTableTarget, 0, VD0, 4, nullptr); + + InstrProfValueData VD2[] = {{getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + {getCalleeAddress(vtable3), 3}}; + Record11.addValueData(IPVK_VTableTarget, 1, VD2, 3, nullptr); + + InstrProfValueData VD4[] = {{getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + {getCalleeAddress(vtable3), 3}}; + Record11.addValueData(IPVK_VTableTarget, 3, VD4, 3, nullptr); + } // A different record for the same caller. Record12.reserveSites(IPVK_IndirectCallTarget, 5); @@ -843,11 +948,28 @@ TEST_P(MaybeSparseInstrProfTest, icall_data_merge) { Record12.addValueData(IPVK_IndirectCallTarget, 3, nullptr, 0, nullptr); - InstrProfValueData VD42[] = {{uint64_t(callee1), 1}, - {uint64_t(callee2), 2}, - {uint64_t(callee3), 3}}; + InstrProfValueData VD42[] = { + {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}}; Record12.addValueData(IPVK_IndirectCallTarget, 4, VD42, 3, nullptr); + // 3 value sites for vtables. + { + Record12.reserveSites(IPVK_VTableTarget, 3); + InstrProfValueData VD0[] = {{getCalleeAddress(vtable2), 5}, + {getCalleeAddress(vtable3), 3}}; + Record12.addValueData(IPVK_VTableTarget, 0, VD0, 2, nullptr); + + InstrProfValueData VD2[] = {{getCalleeAddress(vtable2), 1}, + {getCalleeAddress(vtable3), 3}, + {getCalleeAddress(vtable4), 4}}; + Record12.addValueData(IPVK_VTableTarget, 1, VD2, 3, nullptr); + + InstrProfValueData VD4[] = {{getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + {getCalleeAddress(vtable3), 3}}; + Record12.addValueData(IPVK_VTableTarget, 3, VD4, 3, nullptr); + } + Writer.addRecord(std::move(Record11), Err); // Merge profile data. Writer.addRecord(std::move(Record12), Err); @@ -857,53 +979,95 @@ TEST_P(MaybeSparseInstrProfTest, icall_data_merge) { Writer.addRecord({callee3, 0x1235, {3, 4}}, Err); Writer.addRecord({callee3, 0x1235, {3, 4}}, Err); Writer.addRecord({callee4, 0x1235, {3, 5}}, Err); + Writer.addRecord({callee7, 0x1235, {3, 5}}, Err); + Writer.addRecord({callee8, 0x1235, {3, 5}}, Err); auto Profile = Writer.writeBuffer(); readProfile(std::move(Profile)); + // Test the number of instrumented value sites and the number of profiled + // values for each site. Expected R = Reader->getInstrProfRecord("caller", 0x1234); EXPECT_THAT_ERROR(R.takeError(), Succeeded()); + // For indirect calls. ASSERT_EQ(5U, R->getNumValueSites(IPVK_IndirectCallTarget)); ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 0)); ASSERT_EQ(0U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 1)); ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 2)); - ASSERT_EQ(1U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); + ASSERT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); ASSERT_EQ(3U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 4)); + // For vtables. + ASSERT_EQ(R->getNumValueSites(IPVK_VTableTarget), 3U); + ASSERT_EQ(R->getNumValueDataForSite(IPVK_VTableTarget, 0), 4U); + ASSERT_EQ(R->getNumValueDataForSite(IPVK_VTableTarget, 1), 4U); + ASSERT_EQ(R->getNumValueDataForSite(IPVK_VTableTarget, 2), 3U); + + // Test the merged values for indirect calls. + { + auto VD = R->getValueForSite(IPVK_IndirectCallTarget, 0); + EXPECT_STREQ((const char *)VD[0].Value, "callee2"); + EXPECT_EQ(VD[0].Count, 7U); + EXPECT_STREQ((const char *)VD[1].Value, "callee3"); + EXPECT_EQ(VD[1].Count, 6U); + EXPECT_STREQ((const char *)VD[2].Value, "callee4"); + EXPECT_EQ(VD[2].Count, 4U); + EXPECT_STREQ((const char *)VD[3].Value, "callee1"); + EXPECT_EQ(VD[3].Count, 1U); + + auto VD_2(R->getValueForSite(IPVK_IndirectCallTarget, 2)); + EXPECT_STREQ((const char *)VD_2[0].Value, "callee3"); + EXPECT_EQ(VD_2[0].Count, 6U); + EXPECT_STREQ((const char *)VD_2[1].Value, "callee4"); + EXPECT_EQ(VD_2[1].Count, 4U); + EXPECT_STREQ((const char *)VD_2[2].Value, "callee2"); + EXPECT_EQ(VD_2[2].Count, 3U); + EXPECT_STREQ((const char *)VD_2[3].Value, "callee1"); + EXPECT_EQ(VD_2[3].Count, 1U); + + auto VD_3(R->getValueForSite(IPVK_IndirectCallTarget, 3)); + EXPECT_STREQ((const char *)VD_3[0].Value, "callee8"); + EXPECT_EQ(VD_3[0].Count, 2U); + EXPECT_STREQ((const char *)VD_3[1].Value, "callee7"); + EXPECT_EQ(VD_3[1].Count, 1U); + + auto VD_4(R->getValueForSite(IPVK_IndirectCallTarget, 4)); + EXPECT_STREQ((const char *)VD_4[0].Value, "callee3"); + EXPECT_EQ(VD_4[0].Count, 6U); + EXPECT_STREQ((const char *)VD_4[1].Value, "callee2"); + EXPECT_EQ(VD_4[1].Count, 4U); + EXPECT_STREQ((const char *)VD_4[2].Value, "callee1"); + EXPECT_EQ(VD_4[2].Count, 2U); + } - std::unique_ptr VD = - R->getValueForSite(IPVK_IndirectCallTarget, 0); - ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee2")); - ASSERT_EQ(7U, VD[0].Count); - ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee3")); - ASSERT_EQ(6U, VD[1].Count); - ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee4")); - ASSERT_EQ(4U, VD[2].Count); - ASSERT_EQ(StringRef((const char *)VD[3].Value, 7), StringRef("callee1")); - ASSERT_EQ(1U, VD[3].Count); - - std::unique_ptr VD_2( - R->getValueForSite(IPVK_IndirectCallTarget, 2)); - ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee3")); - ASSERT_EQ(6U, VD_2[0].Count); - ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee4")); - ASSERT_EQ(4U, VD_2[1].Count); - ASSERT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee2")); - ASSERT_EQ(3U, VD_2[2].Count); - ASSERT_EQ(StringRef((const char *)VD_2[3].Value, 7), StringRef("callee1")); - ASSERT_EQ(1U, VD_2[3].Count); - - std::unique_ptr VD_3( - R->getValueForSite(IPVK_IndirectCallTarget, 3)); - ASSERT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee1")); - ASSERT_EQ(1U, VD_3[0].Count); - - std::unique_ptr VD_4( - R->getValueForSite(IPVK_IndirectCallTarget, 4)); - ASSERT_EQ(StringRef((const char *)VD_4[0].Value, 7), StringRef("callee3")); - ASSERT_EQ(6U, VD_4[0].Count); - ASSERT_EQ(StringRef((const char *)VD_4[1].Value, 7), StringRef("callee2")); - ASSERT_EQ(4U, VD_4[1].Count); - ASSERT_EQ(StringRef((const char *)VD_4[2].Value, 7), StringRef("callee1")); - ASSERT_EQ(2U, VD_4[2].Count); + // Test the merged values for vtables + { + auto VD0 = R->getValueForSite(IPVK_VTableTarget, 0); + EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD0[0].Count, 7U); + EXPECT_EQ(VD0[1].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD0[1].Count, 6U); + EXPECT_EQ(VD0[2].Value, getCalleeAddress(vtable4)); + EXPECT_EQ(VD0[2].Count, 4U); + EXPECT_EQ(VD0[3].Value, getCalleeAddress(vtable1)); + EXPECT_EQ(VD0[3].Count, 1U); + + auto VD1 = R->getValueForSite(IPVK_VTableTarget, 1); + EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD1[0].Count, 6U); + EXPECT_EQ(VD1[1].Value, getCalleeAddress(vtable4)); + EXPECT_EQ(VD1[1].Count, 4U); + EXPECT_EQ(VD1[2].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD1[2].Count, 3U); + EXPECT_EQ(VD1[3].Value, getCalleeAddress(vtable1)); + EXPECT_EQ(VD1[3].Count, 1U); + + auto VD2 = R->getValueForSite(IPVK_VTableTarget, 2); + EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD2[0].Count, 6U); + EXPECT_EQ(VD2[1].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD2[1].Count, 4U); + EXPECT_EQ(VD2[2].Value, getCalleeAddress(vtable1)); + EXPECT_EQ(VD2[2].Count, 2U); + } } struct ValueProfileMergeEdgeCaseTest @@ -1027,30 +1191,62 @@ INSTANTIATE_TEST_SUITE_P( EdgeCaseTest, ValueProfileMergeEdgeCaseTest, ::testing::Combine(::testing::Bool(), /* Sparse */ ::testing::Values(IPVK_IndirectCallTarget, - IPVK_MemOPSize) /* ValueKind */ + IPVK_MemOPSize, + IPVK_VTableTarget) /* ValueKind */ )); static void addValueProfData(InstrProfRecord &Record) { - Record.reserveSites(IPVK_IndirectCallTarget, 5); - InstrProfValueData VD0[] = {{uint64_t(callee1), 400}, - {uint64_t(callee2), 1000}, - {uint64_t(callee3), 500}, - {uint64_t(callee4), 300}, - {uint64_t(callee5), 100}}; - Record.addValueData(IPVK_IndirectCallTarget, 0, VD0, 5, nullptr); - InstrProfValueData VD1[] = {{uint64_t(callee5), 800}, - {uint64_t(callee3), 1000}, - {uint64_t(callee2), 2500}, - {uint64_t(callee1), 1300}}; - Record.addValueData(IPVK_IndirectCallTarget, 1, VD1, 4, nullptr); - InstrProfValueData VD2[] = {{uint64_t(callee6), 800}, - {uint64_t(callee3), 1000}, - {uint64_t(callee4), 5500}}; - Record.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr); - InstrProfValueData VD3[] = {{uint64_t(callee2), 1800}, - {uint64_t(callee3), 2000}}; - Record.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr); - Record.addValueData(IPVK_IndirectCallTarget, 4, nullptr, 0, nullptr); + // Add test data for indirect calls. + { + Record.reserveSites(IPVK_IndirectCallTarget, 6); + InstrProfValueData VD0[] = {{uint64_t(callee1), 400}, + {uint64_t(callee2), 1000}, + {uint64_t(callee3), 500}, + {uint64_t(callee4), 300}, + {uint64_t(callee5), 100}}; + Record.addValueData(IPVK_IndirectCallTarget, 0, VD0, 5, nullptr); + InstrProfValueData VD1[] = {{uint64_t(callee5), 800}, + {uint64_t(callee3), 1000}, + {uint64_t(callee2), 2500}, + {uint64_t(callee1), 1300}}; + Record.addValueData(IPVK_IndirectCallTarget, 1, VD1, 4, nullptr); + InstrProfValueData VD2[] = {{uint64_t(callee6), 800}, + {uint64_t(callee3), 1000}, + {uint64_t(callee4), 5500}}; + Record.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr); + InstrProfValueData VD3[] = {{uint64_t(callee2), 1800}, + {uint64_t(callee3), 2000}}; + Record.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr); + Record.addValueData(IPVK_IndirectCallTarget, 4, nullptr, 0, nullptr); + InstrProfValueData VD5[] = {{uint64_t(callee7), 1234}, + {uint64_t(callee8), 5678}}; + Record.addValueData(IPVK_IndirectCallTarget, 5, VD5, 2, nullptr); + } + + // Add test data for vtables + { + Record.reserveSites(IPVK_VTableTarget, 4); + InstrProfValueData VD0[] = { + {getCalleeAddress(vtable1), 400}, {getCalleeAddress(vtable2), 1000}, + {getCalleeAddress(vtable3), 500}, {getCalleeAddress(vtable4), 300}, + {getCalleeAddress(vtable5), 100}, + }; + InstrProfValueData VD1[] = {{getCalleeAddress(vtable5), 800}, + {getCalleeAddress(vtable3), 1000}, + {getCalleeAddress(vtable2), 2500}, + {getCalleeAddress(vtable1), 1300}}; + InstrProfValueData VD2[] = { + {getCalleeAddress(vtable6), 800}, + {getCalleeAddress(vtable3), 1000}, + {getCalleeAddress(vtable4), 5500}, + }; + InstrProfValueData VD3[] = {{getCalleeAddress(vtable2), 1800}, + {getCalleeAddress(vtable3), 2000}}; + Record.addValueData(IPVK_VTableTarget, 0, VD0, 5, nullptr); + Record.addValueData(IPVK_VTableTarget, 1, VD1, 4, nullptr); + Record.addValueData(IPVK_VTableTarget, 2, VD2, 3, nullptr); + Record.addValueData(IPVK_VTableTarget, 3, VD3, 2, nullptr); + } } TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { @@ -1063,59 +1259,107 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { VPData->deserializeTo(Record, nullptr); // Now read data from Record and sanity check the data - ASSERT_EQ(5U, Record.getNumValueSites(IPVK_IndirectCallTarget)); + ASSERT_EQ(6U, Record.getNumValueSites(IPVK_IndirectCallTarget)); ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0)); ASSERT_EQ(4U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 1)); ASSERT_EQ(3U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 2)); ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); ASSERT_EQ(0U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 4)); + ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 5)); auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) { return VD1.Count > VD2.Count; }; + std::unique_ptr VD_0( Record.getValueForSite(IPVK_IndirectCallTarget, 0)); llvm::sort(&VD_0[0], &VD_0[5], Cmp); - ASSERT_EQ(StringRef((const char *)VD_0[0].Value, 7), StringRef("callee2")); - ASSERT_EQ(1000U, VD_0[0].Count); - ASSERT_EQ(StringRef((const char *)VD_0[1].Value, 7), StringRef("callee3")); - ASSERT_EQ(500U, VD_0[1].Count); - ASSERT_EQ(StringRef((const char *)VD_0[2].Value, 7), StringRef("callee1")); - ASSERT_EQ(400U, VD_0[2].Count); - ASSERT_EQ(StringRef((const char *)VD_0[3].Value, 7), StringRef("callee4")); - ASSERT_EQ(300U, VD_0[3].Count); - ASSERT_EQ(StringRef((const char *)VD_0[4].Value, 7), StringRef("callee5")); - ASSERT_EQ(100U, VD_0[4].Count); + EXPECT_STREQ((const char *)VD_0[0].Value, "callee2"); + EXPECT_EQ(1000U, VD_0[0].Count); + EXPECT_STREQ((const char *)VD_0[1].Value, "callee3"); + EXPECT_EQ(500U, VD_0[1].Count); + EXPECT_STREQ((const char *)VD_0[2].Value, "callee1"); + EXPECT_EQ(400U, VD_0[2].Count); + EXPECT_STREQ((const char *)VD_0[3].Value, "callee4"); + EXPECT_EQ(300U, VD_0[3].Count); + EXPECT_STREQ((const char *)VD_0[4].Value, "callee5"); + EXPECT_EQ(100U, VD_0[4].Count); std::unique_ptr VD_1( Record.getValueForSite(IPVK_IndirectCallTarget, 1)); llvm::sort(&VD_1[0], &VD_1[4], Cmp); - ASSERT_EQ(StringRef((const char *)VD_1[0].Value, 7), StringRef("callee2")); - ASSERT_EQ(2500U, VD_1[0].Count); - ASSERT_EQ(StringRef((const char *)VD_1[1].Value, 7), StringRef("callee1")); - ASSERT_EQ(1300U, VD_1[1].Count); - ASSERT_EQ(StringRef((const char *)VD_1[2].Value, 7), StringRef("callee3")); - ASSERT_EQ(1000U, VD_1[2].Count); - ASSERT_EQ(StringRef((const char *)VD_1[3].Value, 7), StringRef("callee5")); - ASSERT_EQ(800U, VD_1[3].Count); + EXPECT_STREQ((const char *)VD_1[0].Value, "callee2"); + EXPECT_EQ(VD_1[0].Count, 2500U); + EXPECT_STREQ((const char *)VD_1[1].Value, "callee1"); + EXPECT_EQ(VD_1[1].Count, 1300U); + EXPECT_STREQ((const char *)VD_1[2].Value, "callee3"); + EXPECT_EQ(VD_1[2].Count, 1000U); + EXPECT_STREQ((const char *)VD_1[3].Value, "callee5"); + EXPECT_EQ(VD_1[3].Count, 800U); std::unique_ptr VD_2( Record.getValueForSite(IPVK_IndirectCallTarget, 2)); llvm::sort(&VD_2[0], &VD_2[3], Cmp); - ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee4")); - ASSERT_EQ(5500U, VD_2[0].Count); - ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee3")); - ASSERT_EQ(1000U, VD_2[1].Count); - ASSERT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee6")); - ASSERT_EQ(800U, VD_2[2].Count); + EXPECT_STREQ((const char *)VD_2[0].Value, "callee4"); + EXPECT_EQ(VD_2[0].Count, 5500U); + EXPECT_STREQ((const char *)VD_2[1].Value, "callee3"); + EXPECT_EQ(VD_2[1].Count, 1000U); + EXPECT_STREQ((const char *)VD_2[2].Value, "callee6"); + EXPECT_EQ(VD_2[2].Count, 800U); std::unique_ptr VD_3( Record.getValueForSite(IPVK_IndirectCallTarget, 3)); llvm::sort(&VD_3[0], &VD_3[2], Cmp); - ASSERT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee3")); - ASSERT_EQ(2000U, VD_3[0].Count); - ASSERT_EQ(StringRef((const char *)VD_3[1].Value, 7), StringRef("callee2")); - ASSERT_EQ(1800U, VD_3[1].Count); + EXPECT_STREQ((const char *)VD_3[0].Value, "callee3"); + EXPECT_EQ(VD_3[0].Count, 2000U); + EXPECT_STREQ((const char *)VD_3[1].Value, "callee2"); + EXPECT_EQ(VD_3[1].Count, 1800U); + + ASSERT_EQ(Record.getNumValueSites(IPVK_VTableTarget), 4U); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 0), 5U); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 1), 4U); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 2), 3U); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 3), 2U); + + auto VD0(Record.getValueForSite(IPVK_VTableTarget, 0)); + llvm::sort(&VD0[0], &VD0[5], Cmp); + EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD0[0].Count, 1000U); + EXPECT_EQ(VD0[1].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD0[1].Count, 500U); + EXPECT_EQ(VD0[2].Value, getCalleeAddress(vtable1)); + EXPECT_EQ(VD0[2].Count, 400U); + EXPECT_EQ(VD0[3].Value, getCalleeAddress(vtable4)); + EXPECT_EQ(VD0[3].Count, 300U); + EXPECT_EQ(VD0[4].Value, getCalleeAddress(vtable5)); + EXPECT_EQ(VD0[4].Count, 100U); + + auto VD1(Record.getValueForSite(IPVK_VTableTarget, 1)); + llvm::sort(&VD1[0], &VD1[4], Cmp); + EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD1[0].Count, 2500U); + EXPECT_EQ(VD1[1].Value, getCalleeAddress(vtable1)); + EXPECT_EQ(VD1[1].Count, 1300U); + EXPECT_EQ(VD1[2].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD1[2].Count, 1000U); + EXPECT_EQ(VD1[3].Value, getCalleeAddress(vtable5)); + EXPECT_EQ(VD1[3].Count, 800U); + + auto VD2(Record.getValueForSite(IPVK_VTableTarget, 2)); + llvm::sort(&VD2[0], &VD2[3], Cmp); + EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable4)); + EXPECT_EQ(VD2[0].Count, 5500U); + EXPECT_EQ(VD2[1].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD2[1].Count, 1000U); + EXPECT_EQ(VD2[2].Value, getCalleeAddress(vtable6)); + EXPECT_EQ(VD2[2].Count, 800U); + + auto VD3(Record.getValueForSite(IPVK_VTableTarget, 3)); + llvm::sort(&VD3[0], &VD3[2], Cmp); + EXPECT_EQ(VD3[0].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD3[0].Count, 2000U); + EXPECT_EQ(VD3[1].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD3[1].Count, 1800U); } TEST(ValueProfileReadWriteTest, symtab_mapping) { @@ -1132,27 +1376,121 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) { Symtab.mapAddress(uint64_t(callee4), 0x4000ULL); // Missing mapping for callee5 + auto getVTableStartAddr = [](const uint64_t *vtable) -> uint64_t { + return uint64_t(vtable); + }; + auto getVTableEndAddr = [](const uint64_t *vtable) -> uint64_t { + return uint64_t(vtable) + 16; + }; + auto getVTableMidAddr = [](const uint64_t *vtable) -> uint64_t { + return uint64_t(vtable) + 8; + }; + // vtable1, vtable2, vtable3, vtable4 get mapped; vtable5, vtable6 are not + // mapped. + Symtab.mapVTableAddress(getVTableStartAddr(vtable1), + getVTableEndAddr(vtable1), MD5Hash("vtable1")); + Symtab.mapVTableAddress(getVTableStartAddr(vtable2), + getVTableEndAddr(vtable2), MD5Hash("vtable2")); + Symtab.mapVTableAddress(getVTableStartAddr(vtable3), + getVTableEndAddr(vtable3), MD5Hash("vtable3")); + Symtab.mapVTableAddress(getVTableStartAddr(vtable4), + getVTableEndAddr(vtable4), MD5Hash("vtable4")); + VPData->deserializeTo(Record, &Symtab); // Now read data from Record and sanity check the data - ASSERT_EQ(5U, Record.getNumValueSites(IPVK_IndirectCallTarget)); - ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0)); + ASSERT_EQ(Record.getNumValueSites(IPVK_IndirectCallTarget), 6U); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0), 5U); + + // Look up the value correpsonding to the middle of a vtable in symtab and + // test that it's the hash of the name. + EXPECT_EQ(Symtab.getVTableHashFromAddress(getVTableMidAddr(vtable1)), + MD5Hash("vtable1")); + EXPECT_EQ(Symtab.getVTableHashFromAddress(getVTableMidAddr(vtable2)), + MD5Hash("vtable2")); + EXPECT_EQ(Symtab.getVTableHashFromAddress(getVTableMidAddr(vtable3)), + MD5Hash("vtable3")); + EXPECT_EQ(Symtab.getVTableHashFromAddress(getVTableMidAddr(vtable4)), + MD5Hash("vtable4")); auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) { return VD1.Count > VD2.Count; }; - std::unique_ptr VD_0( - Record.getValueForSite(IPVK_IndirectCallTarget, 0)); + auto VD_0(Record.getValueForSite(IPVK_IndirectCallTarget, 0)); llvm::sort(&VD_0[0], &VD_0[5], Cmp); ASSERT_EQ(VD_0[0].Value, 0x2000ULL); - ASSERT_EQ(1000U, VD_0[0].Count); + ASSERT_EQ(VD_0[0].Count, 1000U); ASSERT_EQ(VD_0[1].Value, 0x3000ULL); - ASSERT_EQ(500U, VD_0[1].Count); + ASSERT_EQ(VD_0[1].Count, 500U); ASSERT_EQ(VD_0[2].Value, 0x1000ULL); - ASSERT_EQ(400U, VD_0[2].Count); + ASSERT_EQ(VD_0[2].Count, 400U); // callee5 does not have a mapped value -- default to 0. ASSERT_EQ(VD_0[4].Value, 0ULL); + + // Sanity check the vtable value data + ASSERT_EQ(Record.getNumValueSites(IPVK_VTableTarget), 4U); + + { + // The first vtable site. + auto VD(Record.getValueForSite(IPVK_VTableTarget, 0)); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 0), 5U); + llvm::sort(&VD[0], &VD[5], Cmp); + EXPECT_EQ(VD[0].Count, 1000U); + EXPECT_EQ(VD[0].Value, MD5Hash("vtable2")); + EXPECT_EQ(VD[1].Count, 500U); + EXPECT_EQ(VD[1].Value, MD5Hash("vtable3")); + EXPECT_EQ(VD[2].Value, MD5Hash("vtable1")); + EXPECT_EQ(VD[2].Count, 400U); + EXPECT_EQ(VD[3].Value, MD5Hash("vtable4")); + EXPECT_EQ(VD[3].Count, 300U); + + // vtable5 isn't mapped -- default to 0. + EXPECT_EQ(VD[4].Value, 0U); + EXPECT_EQ(VD[4].Count, 100U); + } + + { + // The second vtable site. + auto VD(Record.getValueForSite(IPVK_VTableTarget, 1)); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 1), 4U); + llvm::sort(&VD[0], &VD[4], Cmp); + EXPECT_EQ(VD[0].Value, MD5Hash("vtable2")); + EXPECT_EQ(VD[0].Count, 2500U); + EXPECT_EQ(VD[1].Value, MD5Hash("vtable1")); + EXPECT_EQ(VD[1].Count, 1300U); + + EXPECT_EQ(VD[2].Value, MD5Hash("vtable3")); + EXPECT_EQ(VD[2].Count, 1000U); + // vtable5 isn't mapped -- default to 0. + EXPECT_EQ(VD[3].Value, 0U); + EXPECT_EQ(VD[3].Count, 800U); + } + + { + // The third vtable site. + auto VD(Record.getValueForSite(IPVK_VTableTarget, 2)); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 2), 3U); + llvm::sort(&VD[0], &VD[3], Cmp); + EXPECT_EQ(VD[0].Count, 5500U); + EXPECT_EQ(VD[0].Value, MD5Hash("vtable4")); + EXPECT_EQ(VD[1].Count, 1000U); + EXPECT_EQ(VD[1].Value, MD5Hash("vtable3")); + // vtable6 isn't mapped -- default to 0. + EXPECT_EQ(VD[2].Value, 0U); + EXPECT_EQ(VD[2].Count, 800U); + } + + { + // The fourth vtable site. + auto VD(Record.getValueForSite(IPVK_VTableTarget, 3)); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 3), 2U); + llvm::sort(&VD[0], &VD[2], Cmp); + EXPECT_EQ(VD[0].Count, 2000U); + EXPECT_EQ(VD[0].Value, MD5Hash("vtable3")); + EXPECT_EQ(VD[1].Count, 1800U); + EXPECT_EQ(VD[1].Value, MD5Hash("vtable2")); + } } TEST_P(MaybeSparseInstrProfTest, get_max_function_count) { @@ -1278,13 +1616,13 @@ TEST(SymtabTest, instr_prof_symtab_module_test) { std::string IRPGOName = getIRPGOFuncName(*F); auto IRPGOFuncName = ProfSymtab.getFuncOrVarName(IndexedInstrProf::ComputeHash(IRPGOName)); - EXPECT_EQ(StringRef(IRPGOName), IRPGOFuncName); - EXPECT_EQ(StringRef(Funcs[I]), getParsedIRPGOName(IRPGOFuncName).second); + EXPECT_EQ(IRPGOName, IRPGOFuncName); + EXPECT_EQ(Funcs[I], getParsedIRPGOName(IRPGOFuncName).second); // Ensure we can still read this old record name. std::string PGOName = getPGOFuncName(*F); auto PGOFuncName = ProfSymtab.getFuncOrVarName(IndexedInstrProf::ComputeHash(PGOName)); - EXPECT_EQ(StringRef(PGOName), PGOFuncName); + EXPECT_EQ(PGOName, PGOFuncName); EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str())); } }