Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ThinLTO] Allow importing based on a workload definition #74545

Merged
merged 12 commits into from
Dec 14, 2023
233 changes: 223 additions & 10 deletions llvm/lib/Transforms/IPO/FunctionImport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/Internalize.h"
Expand Down Expand Up @@ -138,6 +139,9 @@ static cl::opt<bool>
ImportAllIndex("import-all-index",
cl::desc("Import all external functions in index."));

static cl::opt<std::string> WorkloadDefinitions("thinlto-workload-def",
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
cl::Hidden);

// Load lazily a module from \p FileName in \p Context.
static std::unique_ptr<Module> loadFile(const std::string &FileName,
LLVMContext &Context) {
Expand Down Expand Up @@ -369,29 +373,238 @@ class GlobalsImporter final {
}
};

static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);

/// Determine the list of imports and exports for each module.
class ModuleImportsManager final {
class ModuleImportsManager {
protected:
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing;
const ModuleSummaryIndex &Index;
DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;

public:
ModuleImportsManager(
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing,
const ModuleSummaryIndex &Index,
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
: IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}

public:
virtual ~ModuleImportsManager() = default;

/// Given the list of globals defined in a module, compute the list of imports
/// as well as the list of "exports", i.e. the list of symbols referenced from
/// another module (that may require promotion).
void computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
StringRef ModName,
FunctionImporter::ImportMapTy &ImportList);
virtual void
computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
StringRef ModName,
FunctionImporter::ImportMapTy &ImportList);

static std::unique_ptr<ModuleImportsManager>
create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing,
const ModuleSummaryIndex &Index,
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
nullptr);
};

class WorkloadImportsManager : public ModuleImportsManager {
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
// Keep a module name -> defined value infos association. We use it to
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
// determine if a module's import list should be done by the base
// ModuleImportsManager or by us.
StringMap<DenseSet<ValueInfo>> Workloads;

void
computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
StringRef ModName,
FunctionImporter::ImportMapTy &ImportList) override {
auto SetIter = Workloads.find(ModName);
if (SetIter == Workloads.end()) {
LLVM_DEBUG(dbgs() << "[Workload] " << ModName
<< " does not contain the root of any context.\n");
return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
ModName, ImportList);
}
LLVM_DEBUG(dbgs() << "[Workload] " << ModName
<< " contains the root(s) of context(s).\n");

GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
ExportLists);
auto &ValueInfos = SetIter->second;
SmallVector<EdgeInfo, 128> GlobWorklist;
for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
auto Candidates =
qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);

const GlobalValueSummary *GVS = nullptr;
FunctionImporter::ImportFailureReason LastReason =
FunctionImporter::ImportFailureReason::None;
for (const auto &Candidate : Candidates) {
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
LastReason = Candidate.first;
if (Candidate.first == FunctionImporter::ImportFailureReason::None) {
const bool Prevailing = IsPrevailing(VI.getGUID(), Candidate.second);
if (Prevailing || !GVS) {
if (!GVS && !Prevailing)
LLVM_DEBUG(dbgs()
<< "[Workload] Considering " << VI.name() << " from "
<< Candidate.second->modulePath() << " with linkage "
<< Candidate.second->linkage()
<< " although it's not prevailing, but it's the "
"first available candidate.\n");
GVS = Candidate.second;
if (Prevailing) {
LLVM_DEBUG(dbgs()
<< "[Workload] Considering " << VI.name() << " from "
<< GVS->modulePath() << " with linkage "
<< GVS->linkage() << " because it's prevailing.\n");
break;
}
} else {
LLVM_DEBUG(dbgs() << "[Workload] Skipping " << VI.name() << " from "
<< Candidate.second->modulePath()
<< " with linkage " << Candidate.second->linkage()
<< " because it's not prevailing\n");
}
}
}
if (!GVS) {
LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
<< " because can't select Callee. Guid is: "
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
<< Function::getGUID(VI.name())
<< ". The reason was: " << getFailureName(LastReason)
<< "\n");
continue;
}
const auto *CFS = cast<FunctionSummary>(GVS->getBaseObject());
auto ExportingModule = CFS->modulePath();
if (ExportingModule == ModName) {
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
<< " because its defining module is the same as the "
"current module\n");
continue;
}
if (!shouldImport(DefinedGVSummaries, VI.getGUID(), CFS)) {
LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
<< " because we have a local copy.\n");
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
continue;
}

LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
<< ExportingModule << " : "
<< Function::getGUID(VI.name()) << "\n");
ImportList[ExportingModule].insert(VI.getGUID());
GVI.onImportingSummary(*GVS);
if (ExportLists)
(*ExportLists)[ExportingModule].insert(VI);
}
LLVM_DEBUG(dbgs() << "[Workload] Done\n");
}

bool shouldImport(const GVSummaryMapTy &DefinedGVSummaries,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe change shouldImport to a name that is more descriptive of what it is checking, e.g. something like moduleAlreadyHasPreferredDef (with a flipped return value)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, depending on how much you care about speed, you could completely skip the search for a candidate in the case where the module already contains either a prevailing or a non-interposable copy.

Function::GUID Guid, const GlobalValueSummary *Candidate) {
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
auto DefinedSummary = DefinedGVSummaries.find(Guid);
if (DefinedSummary == DefinedGVSummaries.end())
return true;

// See shouldImportGlobal for the justificaton of the isInterposableLinkage.
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
if (!IsPrevailing(Guid, DefinedSummary->second) &&
GlobalValue::isInterposableLinkage(DefinedSummary->second->linkage()) &&
IsPrevailing(Guid, Candidate)) {
LLVM_DEBUG(dbgs() << "[Workload] " << Guid
<< ": local non-prevailing in module. Importing from "
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
<< Candidate->modulePath() << "\n");
return true;
}
LLVM_DEBUG(dbgs() << "[Workload] " << Guid
<< ": ignored! Target already in destination module.\n");
return false;
}

public:
WorkloadImportsManager(
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing,
const ModuleSummaryIndex &Index,
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
: ModuleImportsManager(IsPrevailing, Index, ExportLists) {
StringMap<ValueInfo> CtxGuidToValueInfo;
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
for (auto &I : Index) {
ValueInfo VI(Index.haveGVs(), &I);
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
CtxGuidToValueInfo[VI.name()] = VI;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this work for internal linkage functions? Are you relying on the user having specified -funique-internal-linkage-names? Ah, I see you cover this in the PR description, would be good to add a comment here and where you describe the format of the input file (which I suggest earlier at the option definition). Also, make sure the fallback handling makes sense in the cases where the option was not specified (e.g. should there be an error if the entry already exists in this map for the name?). Also, include an internal linkage function in test case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the comments. For test, @m1_f2 is internal and imported by the second module.

Copy link
Member Author

@mtrofin mtrofin Dec 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also reporting (LLVM_DEBUG) ambiguous names, if the workfload def references such things.

}
std::error_code EC;
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
if (std::error_code EC = BufferOrErr.getError()) {
report_fatal_error("Failed to open context file");
return;
}
auto Buffer = std::move(BufferOrErr.get());
std::map<std::string, std::vector<std::string>> WorkloadDefs;
json::Path::Root NullRoot;
auto Parsed = json::parse(Buffer->getBuffer());
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
if (!Parsed)
report_fatal_error(Parsed.takeError());
if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
report_fatal_error("Invalid thinlto contextual profile format.");
for (const auto &Workload : WorkloadDefs) {
const auto &Root = Workload.first;
LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
const auto &AllCallees = Workload.second;
auto RootIt = CtxGuidToValueInfo.find(Root);
if (RootIt == CtxGuidToValueInfo.end()) {
LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
<< " not found in this linkage unit.\n");
continue;
}
auto RootVI = RootIt->second;
if (RootVI.getSummaryList().size() != 1) {
LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
<< " should have exactly one summary, but has "
<< RootVI.getSummaryList().size() << ". Skipping.\n");
continue;
}
StringRef RootDefiningModule =
RootVI.getSummaryList().front()->modulePath();
LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
<< " is : " << RootDefiningModule << "\n");
auto &Set = Workloads[RootDefiningModule];
for (const auto &Callee : AllCallees) {
LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
auto ElemIt = CtxGuidToValueInfo.find(Callee);
if (ElemIt == CtxGuidToValueInfo.end()) {
LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
continue;
}
Set.insert(ElemIt->second);
}
LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << " we have "
<< Set.size() << " distinct callees.\n");
LLVM_DEBUG( //
for (const auto &VI
: Set) {
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
dbgs() << "[Workload] Root: " << Root
<< " Would include: " << VI.getGUID() << "\n";
});
}
}
};

std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing,
const ModuleSummaryIndex &Index,
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
if (WorkloadDefinitions.empty()) {
LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
return std::unique_ptr<ModuleImportsManager>(
new ModuleImportsManager(IsPrevailing, Index, ExportLists));
}
LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
ExportLists);
}

static const char *
getFailureName(FunctionImporter::ImportFailureReason Reason) {
switch (Reason) {
Expand Down Expand Up @@ -732,14 +945,14 @@ void llvm::ComputeCrossModuleImport(
isPrevailing,
DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
ModuleImportsManager MIS(isPrevailing, Index, &ExportLists);
auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
// For each module that has function defined, compute the import/export lists.
for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
auto &ImportList = ImportLists[DefinedGVSummaries.first];
LLVM_DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first << "'\n");
MIS.computeImportForModule(DefinedGVSummaries.second,
DefinedGVSummaries.first, ImportList);
MIS->computeImportForModule(DefinedGVSummaries.second,
DefinedGVSummaries.first, ImportList);
}

// When computing imports we only added the variables and functions being
Expand Down Expand Up @@ -855,8 +1068,8 @@ static void ComputeCrossModuleImportForModuleForTest(

// Compute the import list for this module.
LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
ModuleImportsManager MIS(isPrevailing, Index);
MIS.computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
auto MIS = ModuleImportsManager::create(isPrevailing, Index);
MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);

#ifndef NDEBUG
dumpImportListForModule(Index, ModulePath, ImportList);
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/ThinLTO/X86/Inputs/workload1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

declare void @m1_variant()

define dso_local void @m1_f1() {
call void @m1_f2()
call void @noninterposable_f()
ret void
}

define internal void @m1_f2() {
call void @interposable_f()
ret void
}

define linkonce void @interposable_f() {
call void @m1_variant()
ret void
}

define linkonce_odr void @noninterposable_f() {
call void @m1_variant()
ret void
}
22 changes: 22 additions & 0 deletions llvm/test/ThinLTO/X86/Inputs/workload2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

declare void @m2_variant()

define dso_local void @m2_f1() {
call void @interposable_f()
call void @noninterposable_f()
ret void
}

@m2_f1_alias = alias void (...), ptr @m2_f1

define linkonce_odr void @interposable_f() {
mtrofin marked this conversation as resolved.
Show resolved Hide resolved
call void @m2_variant()
ret void
}

define linkonce_odr void @noninterposable_f() {
call void @m2_variant()
ret void
}
9 changes: 9 additions & 0 deletions llvm/test/ThinLTO/X86/Inputs/workload3.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

declare void @m1_f1()

define dso_local void @m3_f1() {
call void @m1_f1()
ret void
}
Loading