Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/include/llvm/Analysis/CtxProfAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class PGOContextualProfile {

const PGOCtxProfile &profiles() const { return Profiles; }

bool isInSpecializedModule() const { return IsInSpecializedModule; }
bool isInSpecializedModule() const;

bool isFunctionKnown(const Function &F) const {
return getDefinedFunctionGUID(F) != 0;
Expand Down
5 changes: 3 additions & 2 deletions llvm/include/llvm/Analysis/ProfileSummaryInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ class ProfileSummaryInfo {
ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); }
ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default;

/// If no summary is present, attempt to refresh.
void refresh();
/// If a summary is provided as argument, use that. Otherwise,
/// if the `Summary` member is null, attempt to refresh.
void refresh(std::unique_ptr<ProfileSummary> &&Other = nullptr);

/// Returns true if profile summary is available.
bool hasProfileSummary() const { return Summary != nullptr; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@ namespace llvm {

class PGOCtxProfFlatteningPass
: public PassInfoMixin<PGOCtxProfFlatteningPass> {
const bool IsPreThinlink;

public:
explicit PGOCtxProfFlatteningPass() = default;
explicit PGOCtxProfFlatteningPass(bool IsPreThinlink)
: IsPreThinlink(IsPreThinlink) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};
} // namespace llvm
Expand Down
38 changes: 28 additions & 10 deletions llvm/lib/Analysis/CtxProfAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ static cl::opt<CtxProfAnalysisPrinterPass::PrintMode> PrintLevel(
"just the yaml representation of the profile")),
cl::desc("Verbosity level of the contextual profile printer pass."));

static cl::opt<bool> ForceIsInSpecializedModule(
"ctx-profile-force-is-specialized", cl::init(false),
cl::desc("Treat the given module as-if it were containing the "
"post-thinlink module containing the root"));

const char *AssignGUIDPass::GUIDMetadataName = "guid";

PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) {
Expand Down Expand Up @@ -278,6 +283,12 @@ void PGOContextualProfile::initIndex() {
});
}

bool PGOContextualProfile::isInSpecializedModule() const {
return ForceIsInSpecializedModule.getNumOccurrences() > 0
? ForceIsInSpecializedModule
: IsInSpecializedModule;
}

void PGOContextualProfile::update(Visitor V, const Function &F) {
assert(isFunctionKnown(F));
GlobalValue::GUID G = getDefinedFunctionGUID(F);
Expand All @@ -299,20 +310,27 @@ void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {

const CtxProfFlatProfile PGOContextualProfile::flatten() const {
CtxProfFlatProfile Flat;
auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,
const SmallVectorImpl<uint64_t> &From) {
if (Into.empty())
Into.resize(From.size());
assert(Into.size() == From.size() &&
"All contexts corresponding to a function should have the exact "
"same number of counters.");
for (size_t I = 0, E = Into.size(); I < E; ++I)
Into[I] += From[I];
};

preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
const PGOCtxProfContext>(
Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
auto [It, Ins] = Flat.insert({Ctx.guid(), {}});
if (Ins) {
llvm::append_range(It->second, Ctx.counters());
return;
}
assert(It->second.size() == Ctx.counters().size() &&
"All contexts corresponding to a function should have the exact "
"same number of counters.");
for (size_t I = 0, E = It->second.size(); I < E; ++I)
It->second[I] += Ctx.counters()[I];
Accummulate(Flat[Ctx.guid()], Ctx.counters());
});
for (const auto &[_, RC] : Profiles.Contexts)
for (const auto &[G, Unh] : RC.getUnhandled())
Accummulate(Flat[G], Unh);
for (const auto &[G, FC] : Profiles.FlatProfiles)
Accummulate(Flat[G], FC);
return Flat;
}

Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Analysis/ProfileSummaryInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ static cl::opt<double> PartialSampleProfileWorkingSetSizeScaleFactor(
// any backend passes (IR level instrumentation, for example). This method
// checks if the Summary is null and if so checks if the summary metadata is now
// available in the module and parses it to get the Summary object.
void ProfileSummaryInfo::refresh() {
void ProfileSummaryInfo::refresh(std::unique_ptr<ProfileSummary> &&Other) {
if (Other) {
Summary.swap(Other);
return;
}
if (hasProfileSummary())
return;
// First try to get context sensitive ProfileSummary.
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1048,7 +1048,7 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
MPM.addPass(GlobalOptPass());
MPM.addPass(GlobalDCEPass());
MPM.addPass(PGOCtxProfFlatteningPass());
MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
}

MPM.addPass(createModuleToFunctionPassAdaptor(
Expand Down Expand Up @@ -1242,8 +1242,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
// mechanism for GUIDs.
MPM.addPass(AssignGUIDPass());
if (IsCtxProfUse)
if (IsCtxProfUse) {
MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
return MPM;
}
// Block further inlining in the instrumented ctxprof case. This avoids
// confusingly collecting profiles for the same GUID corresponding to
// different variants of the function. We could do like PGO and identify
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ MODULE_PASS("coro-early", CoroEarlyPass())
MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
MODULE_PASS("ctx-instr-gen",
PGOInstrumentationGen(PGOInstrumentationType::CTXPROF))
MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass())
MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false))
MODULE_PASS("ctx-prof-flatten-prethinlink",
PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true))
MODULE_PASS("noinline-nonprevailing", NoinlineNonPrevailing())
MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
MODULE_PASS("debugify", NewPMDebugifyPass())
Expand Down
37 changes: 23 additions & 14 deletions llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ class ProfileAnnotator final {
// To be accessed through getBBInfo() after construction.
std::map<const BasicBlock *, BBInfo> BBInfos;
std::vector<EdgeInfo> EdgeInfos;
InstrProfSummaryBuilder &PB;

// This is an adaptation of PGOUseFunc::populateCounters.
// FIXME(mtrofin): look into factoring the code to share one implementation.
Expand Down Expand Up @@ -284,9 +283,8 @@ class ProfileAnnotator final {
}

public:
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
InstrProfSummaryBuilder &PB)
: F(F), Counters(Counters), PB(PB) {
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters)
: F(F), Counters(Counters) {
assert(!F.isDeclaration());
assert(!Counters.empty());
size_t NrEdges = 0;
Expand Down Expand Up @@ -351,8 +349,6 @@ class ProfileAnnotator final {
(TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
std::max(TrueCount, FalseCount));
PB.addInternalCount(TrueCount);
PB.addInternalCount(FalseCount);
}
}
}
Expand All @@ -364,7 +360,6 @@ class ProfileAnnotator final {
assert(!Counters.empty());
propagateCounterValues(Counters);
F.setEntryCount(Counters[0]);
PB.addEntryCount(Counters[0]);

for (auto &BB : F) {
const auto &BBInfo = getBBInfo(BB);
Expand All @@ -381,7 +376,6 @@ class ProfileAnnotator final {
if (EdgeCount > MaxCount)
MaxCount = EdgeCount;
EdgeCounts[SuccIdx] = EdgeCount;
PB.addInternalCount(EdgeCount);
}

if (MaxCount != 0)
Expand Down Expand Up @@ -431,16 +425,20 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
// e.g. synthetic weights, etc) because it wouldn't interfere with the
// contextual - based one (which would be in other modules)
auto OnExit = llvm::make_scope_exit([&]() {
if (IsPreThinlink)
return;
for (auto &F : M)
removeInstrumentation(F);
});
auto &CtxProf = MAM.getResult<CtxProfAnalysis>(M);
if (CtxProf.contexts().empty())
// post-thinlink, we only reprocess for the module(s) containing the
// contextual tree. For everything else, OnExit will just clean the
// instrumentation.
if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
return PreservedAnalyses::none();

const auto FlattenedProfile = CtxProf.flatten();

InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs);
for (auto &F : M) {
if (F.isDeclaration())
continue;
Expand All @@ -456,15 +454,26 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
if (It == FlattenedProfile.end())
clearColdFunctionProfile(F);
else {
ProfileAnnotator S(F, It->second, PB);
ProfileAnnotator S(F, It->second);
S.assignProfileData();
}
}

auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs);
// use here the flat profiles just so the importer doesn't complain about
// how different the PSIs are between the module with the roots and the
// various modules it imports.
for (auto &C : FlattenedProfile) {
PB.addEntryCount(C.second[0]);
for (auto V : llvm::drop_begin(C.second))
PB.addInternalCount(V);
}

M.setProfileSummary(PB.getSummary()->getMD(M.getContext()),
ProfileSummary::Kind::PSK_Instr);
PSI.refresh();
PreservedAnalyses PA;
PA.abandon<ProfileSummaryAnalysis>();
MAM.invalidate(M, PA);
auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
PSI.refresh(PB.getSummary());
return PreservedAnalyses::none();
}
61 changes: 39 additions & 22 deletions llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
; RUN: split-file %s %t
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata
; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
; RUN: %t/example.ll -S -o %t/prelink.ll
; RUN: FileCheck --input-file %t/prelink.ll %s --check-prefix=PRELINK
; RUN: opt -passes='ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S | FileCheck %s
; RUN: %t/example.ll -S -o %t/4909520559318251808.ll
; RUN: FileCheck --input-file %t/4909520559318251808.ll %s --check-prefix=PRELINK

; RUN: opt -passes='ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata %t/4909520559318251808.ll -S | FileCheck %s --check-prefix=POSTLINK
;
;
; Check that instrumentation occurs where expected: the "no" block for both foo and
Expand All @@ -18,57 +19,73 @@
; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1)

; PRELINK-LABEL: @an_entrypoint
; PRELINK: br i1 %t, label %yes, label %common.ret, !prof ![[PREPROF:[0-9]+]]
; PRELINK-LABEL: yes:
; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1)
; PRELINK-NOT: "ProfileSummary"
; PRELINK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
; PRELINK: ![[#]] = !{!"TotalCount", i64 3595}
; PRELINK: ![[#]] = !{!"MaxCount", i64 3000}
; PRELINK: ![[#]] = !{!"MaxInternalCount", i64 3000}
; PRELINK: ![[#]] = !{!"MaxFunctionCount", i64 300}
; PRELINK: ![[#]] = !{!"NumCounts", i64 6}
; PRELINK: ![[#]] = !{!"NumFunctions", i64 3}
; PRELINK: ![[PREPROF]] = !{!"branch_weights", i32 40, i32 60}

; Check that the output has:
; - no instrumentation
; - the 2 functions have an entry count
; - each conditional branch has profile annotation
;
; CHECK-NOT: call void @llvm.instrprof
; POSTLINK-NOT: call void @llvm.instrprof
;
; make sure we have function entry counts, branch weights, and a profile summary.
; CHECK-LABEL: @foo
; CHECK-SAME: !prof ![[FOO_EP:[0-9]+]]
; CHECK: br i1 %t, label %yes, label %no, !prof ![[FOO_BW:[0-9]+]]
; CHECK-LABEL: @an_entrypoint
; CHECK-SAME: !prof ![[AN_ENTRYPOINT_EP:[0-9]+]]
; CHECK: br i1 %t, label %yes, label %common.ret, !prof ![[AN_ENTRYPOINT_BW:[0-9]+]]
; POSTLINK-LABEL: @foo
; POSTLINK-SAME: !prof ![[FOO_EP:[0-9]+]]
; POSTLINK: br i1 %t, label %yes, label %no, !prof ![[FOO_BW:[0-9]+]]
; POSTLINK-LABEL: @an_entrypoint
; POSTLINK-SAME: !prof ![[AN_ENTRYPOINT_EP:[0-9]+]]
; POSTLINK: br i1 %t, label %yes, label %common.ret, !prof ![[AN_ENTRYPOINT_BW:[0-9]+]]

; The postlink summary is restricted to the stuff under the root - including the
; "unhandled" data.
; POSTLINK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
; POSTLINK: ![[#]] = !{!"TotalCount", i64 1495}
; POSTLINK: ![[#]] = !{!"MaxCount", i64 1000}
; POSTLINK: ![[#]] = !{!"MaxInternalCount", i64 1000}
; POSTLINK: ![[#]] = !{!"MaxFunctionCount", i64 200}
; POSTLINK: ![[#]] = !{!"NumCounts", i64 6}
; POSTLINK: ![[#]] = !{!"NumFunctions", i64 3}

; CHECK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
; CHECK: ![[#]] = !{!"TotalCount", i64 480}
; CHECK: ![[#]] = !{!"MaxCount", i64 140}
; CHECK: ![[#]] = !{!"MaxInternalCount", i64 125}
; CHECK: ![[#]] = !{!"MaxFunctionCount", i64 140}
; CHECK: ![[#]] = !{!"NumCounts", i64 6}
; CHECK: ![[#]] = !{!"NumFunctions", i64 2}
;
; @foo will be called both unconditionally and conditionally, on the "yes" branch
; which has a count of 40. So 140 times.

; CHECK: ![[FOO_EP]] = !{!"function_entry_count", i64 140}
; POSTLINK: ![[FOO_EP]] = !{!"function_entry_count", i64 140}

; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo).
; Which means its "yes" branch is taken 140 - 15 times.

; CHECK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15}
; CHECK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
; CHECK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60}
; POSTLINK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15}
; POSTLINK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
; POSTLINK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60}

;--- profile.yaml
Contexts:
- Guid: 4909520559318251808
TotalRootEntryCount: 100
Unhandled:
- Guid: 1234
Counters: [200, 1000]
Counters: [100, 40]
Callsites: -
- Guid: 11872291593386833696
Counters: [ 100, 5 ]
-
- Guid: 11872291593386833696
Counters: [ 40, 10 ]
FlatProfiles:
- Guid: 1234
Counters: [ 100, 2000 ]
;--- example.ll
declare void @bar()

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile_pump.yaml --output=%t/profile_pump.ctxprofdata
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile_unreachable.yaml --output=%t/profile_unreachable.ctxprofdata
;
; RUN: opt -passes=ctx-prof-flatten %t/example_ok.ll -use-ctx-profile=%t/profile_ok.ctxprofdata -S -o - | FileCheck %s
; RUN: not --crash opt -passes=ctx-prof-flatten %t/message_pump.ll -use-ctx-profile=%t/profile_pump.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
; RUN: not --crash opt -passes=ctx-prof-flatten %t/unreachable.ll -use-ctx-profile=%t/profile_unreachable.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
; RUN: opt -passes=ctx-prof-flatten %t/example_ok.ll -ctx-profile-force-is-specialized -use-ctx-profile=%t/profile_ok.ctxprofdata -S -o - | FileCheck %s
; RUN: not --crash opt -passes=ctx-prof-flatten %t/message_pump.ll -ctx-profile-force-is-specialized -use-ctx-profile=%t/profile_pump.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
; RUN: not --crash opt -passes=ctx-prof-flatten %t/unreachable.ll -ctx-profile-force-is-specialized -use-ctx-profile=%t/profile_unreachable.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION

; CHECK: br i1 %x, label %b1, label %exit, !prof ![[PROF1:[0-9]+]]
; CHECK: br i1 %y, label %blk, label %exit, !prof ![[PROF2:[0-9]+]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; Check that flattened profile lowering handles cold subgraphs that end in "unreachable"
; RUN: split-file %s %t
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata
; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s
; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s

; CHECK-LABEL: entry:
; CHECK: br i1 %t, label %yes, label %no, !prof ![[C1:[0-9]+]]
Expand All @@ -16,7 +16,7 @@
; CHECK-NOT: !prof
; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0}

;--- example.ll
;--- 1234.ll
define void @f1(i32 %cond) !guid !0 {
entry:
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 0)
Expand Down
Loading
Loading