Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstrProf] Add vtables with type metadata into symtab #81051

Merged
merged 9 commits into from
May 9, 2024
28 changes: 26 additions & 2 deletions llvm/include/llvm/ProfileData/InstrProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
Expand Down Expand Up @@ -470,6 +471,12 @@ class InstrProfSymtab {
// A map from MD5 keys to function define. We only populate this map
// when build the Symtab from a Module.
std::vector<std::pair<uint64_t, Function *>> MD5FuncMap;
// A map from MD5 to the global variable. This map is only populated when
// building the symtab from a module. Use separate container instances for
// `MD5FuncMap` and `MD5VTableMap`.
// TODO: Unify the container type and the lambda function 'mapName' inside
// add{Func,VTable}WithName.
DenseMap<uint64_t, GlobalVariable *> MD5VTableMap;
// A map from function runtime address to function name MD5 hash.
// This map is only populated and used by raw instr profile reader.
AddrHashMap AddrToMD5Map;
Expand All @@ -488,12 +495,18 @@ class InstrProfSymtab {

// Add the function into the symbol table, by creating the following
// map entries:
// name-set = {PGOFuncName} + {getCanonicalName(PGOFuncName)} if the canonical
// name is different from pgo name
// name-set = {PGOFuncName} union {getCanonicalName(PGOFuncName)}
// - In MD5NameMap: <MD5Hash(name), name> for name in name-set
// - In MD5FuncMap: <MD5Hash(name), &F> for name in name-set
Error addFuncWithName(Function &F, StringRef PGOFuncName);

// Add the vtable into the symbol table, by creating the following
// map entries:
// name-set = {PGOName} union {getCanonicalName(PGOName)}
// - In MD5NameMap: <MD5Hash(name), name> for name in name-set
// - In MD5VTableMap: <MD5Hash(name), name> for name in name-set
Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName);

// If the symtab is created by a series of calls to \c addFuncName, \c
// finalizeSymtab needs to be called before looking up function names.
// This is required because the underlying map is a vector (for space
Expand Down Expand Up @@ -555,6 +568,7 @@ class InstrProfSymtab {
Error create(const FuncNameIterRange &FuncIterRange,
const VTableNameIterRange &VTableIterRange);

// Map the MD5 of the symbol name to the name.
Error addSymbolName(StringRef SymbolName) {
if (SymbolName.empty())
return make_error<InstrProfError>(instrprof_error::malformed,
Expand Down Expand Up @@ -630,6 +644,10 @@ class InstrProfSymtab {
/// Return function from the name's md5 hash. Return nullptr if not found.
inline Function *getFunction(uint64_t FuncMD5Hash);

/// Return the global variable corresponding to md5 hash. Return nullptr if
/// not found.
inline GlobalVariable *getGlobalVariable(uint64_t MD5Hash);

/// Return the name section data.
inline StringRef getNameData() const { return Data; }

Expand Down Expand Up @@ -709,6 +727,12 @@ Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
return nullptr;
}

GlobalVariable *InstrProfSymtab::getGlobalVariable(uint64_t MD5Hash) {
if (auto Iter = MD5VTableMap.find(MD5Hash); Iter != MD5VTableMap.end())
return Iter->second;
return nullptr;
}

// To store the sums of profile count values, or the percentage of
// the sums of the total count values.
struct CountSumOrPercent {
Expand Down
32 changes: 32 additions & 0 deletions llvm/lib/ProfileData/InstrProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,11 +476,43 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
return E;
}

SmallVector<MDNode *, 2> Types;
for (GlobalVariable &G : M.globals()) {
if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
continue;
if (Error E = addVTableWithName(
G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
return E;
}

Sorted = false;
finalizeSymtab();
return Error::success();
}

Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
StringRef VTablePGOName) {
auto mapName = [&](StringRef Name) -> Error {
if (Error E = addSymbolName(Name))
return E;

bool Inserted = true;
std::tie(std::ignore, Inserted) =
MD5VTableMap.try_emplace(GlobalValue::getGUID(Name), &VTable);
if (!Inserted)
LLVM_DEBUG(dbgs() << "GUID conflict within one module");
return Error::success();
};
if (Error E = mapName(VTablePGOName))
return E;

StringRef CanonicalName = getCanonicalName(VTablePGOName);
if (CanonicalName != VTablePGOName)
return mapName(CanonicalName);

return Error::success();
}

/// \c NameStrings is a string composed of one of more possibly encoded
/// sub-strings. The substrings are separated by 0 or more zero bytes. This
/// method decodes the string and calls `NameCallback` for each substring.
Expand Down
47 changes: 47 additions & 0 deletions llvm/unittests/ProfileData/InstrProfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
Expand Down Expand Up @@ -1603,6 +1605,34 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get());
Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get());

// [ptr, ptr, ptr]
ArrayType *VTableArrayType = ArrayType::get(
PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()),
3);
Constant *Int32TyNull =
llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx));
SmallVector<llvm::Type *, 1> tys = {VTableArrayType};
StructType *VTableType = llvm::StructType::get(Ctx, tys);

// Create two vtables in the module, one with external linkage and the other
// with local linkage.
for (auto [Name, Linkage] :
{std::pair{"ExternalGV", GlobalValue::ExternalLinkage},
{"LocalGV", GlobalValue::InternalLinkage}}) {
llvm::Twine FuncName(Name, StringRef("VFunc"));
Function *VFunc = Function::Create(FTy, Linkage, FuncName, M.get());
GlobalVariable *GV = new llvm::GlobalVariable(
*M, VTableType, /* isConstant= */ true, Linkage,
llvm::ConstantStruct::get(
VTableType,
{llvm::ConstantArray::get(VTableArrayType,
{Int32TyNull, Int32TyNull, VFunc})}),
Name);
// Add type metadata for the test data, since vtables with type metadata
// are added to symtab.
GV->addTypeMetadata(16, MDString::get(Ctx, Name));
}

InstrProfSymtab ProfSymtab;
EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded());

Expand All @@ -1624,6 +1654,23 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
EXPECT_EQ(PGOName, PGOFuncName);
EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str()));
}

StringRef VTables[] = {"ExternalGV", "LocalGV"};
for (auto [VTableName, PGOName] : {std::pair{"ExternalGV", "ExternalGV"},
{"LocalGV", "MyModule.cpp;LocalGV"}}) {
GlobalVariable *GV =
M->getGlobalVariable(VTableName, /* AllowInternal=*/true);

// Test that ProfSymtab returns the expected name given a hash.
std::string IRPGOName = getPGOName(*GV);
EXPECT_STREQ(IRPGOName.c_str(), PGOName);
uint64_t GUID = IndexedInstrProf::ComputeHash(IRPGOName);
EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(GUID));
EXPECT_EQ(VTableName, getParsedIRPGOName(IRPGOName).second);

// Test that ProfSymtab returns the expected global variable
EXPECT_EQ(GV, ProfSymtab.getGlobalVariable(GUID));
}
}

// Testing symtab serialization and creator/deserialization interface
Expand Down