Skip to content

Commit ada9da7

Browse files
authored
[MachineOutliner] Add profile guided outlining (#154437)
1 parent 3ce1656 commit ada9da7

File tree

8 files changed

+215
-56
lines changed

8 files changed

+215
-56
lines changed

llvm/docs/ReleaseNotes.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ Changes to TableGen
7575
Changes to Interprocedural Optimizations
7676
----------------------------------------
7777

78+
* Added `-enable-machine-outliner={optimistic-pgo,conservative-pgo}` to read
79+
profile data to guide the machine outliner
80+
([#154437](https://github.com/llvm/llvm-project/pull/154437)).
81+
7882
Changes to Vectorizers
7983
----------------------------------------
8084

llvm/include/llvm/CodeGen/Passes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class ModulePass;
3131
class Pass;
3232
class TargetMachine;
3333
class raw_ostream;
34+
enum class RunOutliner;
3435

3536
template <typename T> class IntrusiveRefCntPtr;
3637
namespace vfs {
@@ -520,7 +521,7 @@ LLVM_ABI ModulePass *createGlobalMergeFuncPass();
520521

521522
/// This pass performs outlining on machine instructions directly before
522523
/// printing assembly.
523-
LLVM_ABI ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true);
524+
LLVM_ABI ModulePass *createMachineOutlinerPass(RunOutliner RunOutlinerMode);
524525

525526
/// This pass expands the reduction intrinsics into sequences of shuffles.
526527
LLVM_ABI FunctionPass *createExpandReductionsPass();

llvm/include/llvm/Passes/CodeGenPassBuilder.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,11 +1095,9 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addMachinePasses(
10951095
if (TM.Options.EnableMachineOutliner &&
10961096
getOptLevel() != CodeGenOptLevel::None &&
10971097
Opt.EnableMachineOutliner != RunOutliner::NeverOutline) {
1098-
bool RunOnAllFunctions =
1099-
(Opt.EnableMachineOutliner == RunOutliner::AlwaysOutline);
1100-
bool AddOutliner = RunOnAllFunctions || TM.Options.SupportsDefaultOutlining;
1101-
if (AddOutliner)
1102-
addPass(MachineOutlinerPass(RunOnAllFunctions));
1098+
if (Opt.EnableMachineOutliner != RunOutliner::TargetDefault ||
1099+
TM.Options.SupportsDefaultOutlining)
1100+
addPass(MachineOutlinerPass(Opt.EnableMachineOutliner));
11031101
}
11041102

11051103
addPass(StackFrameLayoutAnalysisPass());

llvm/include/llvm/Target/CGPassBuilderOption.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,13 @@
2121

2222
namespace llvm {
2323

24-
enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
24+
enum class RunOutliner {
25+
TargetDefault,
26+
AlwaysOutline,
27+
OptimisticPGO,
28+
ConservativePGO,
29+
NeverOutline
30+
};
2531
enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };
2632

2733
class RegAllocTypeParser : public cl::parser<RegAllocType> {

llvm/lib/CodeGen/MachineOutliner.cpp

Lines changed: 86 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,10 @@
5959
#include "llvm/ADT/SmallSet.h"
6060
#include "llvm/ADT/Statistic.h"
6161
#include "llvm/ADT/Twine.h"
62+
#include "llvm/Analysis/BlockFrequencyInfo.h"
6263
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
6364
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
65+
#include "llvm/Analysis/ProfileSummaryInfo.h"
6466
#include "llvm/CGData/CodeGenDataReader.h"
6567
#include "llvm/CodeGen/LivePhysRegs.h"
6668
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -107,6 +109,16 @@ STATISTIC(StableHashAttempts,
107109
STATISTIC(StableHashDropped,
108110
"Count of unsuccessful hashing attempts for outlined functions");
109111
STATISTIC(NumRemovedLOHs, "Total number of Linker Optimization Hints removed");
112+
STATISTIC(NumPGOBlockedOutlined,
113+
"Number of times outlining was blocked by PGO");
114+
STATISTIC(NumPGOAllowedCold,
115+
"Number of times outlining was allowed from cold functions");
116+
STATISTIC(NumPGOConservativeBlockedOutlined,
117+
"Number of times outlining was blocked conservatively when profile "
118+
"counts were missing");
119+
STATISTIC(NumPGOOptimisticOutlined,
120+
"Number of times outlining was allowed optimistically when profile "
121+
"counts were missing");
110122

111123
// Set to true if the user wants the outliner to run on linkonceodr linkage
112124
// functions. This is false by default because the linker can dedupe linkonceodr
@@ -438,11 +450,10 @@ struct MachineOutliner : public ModulePass {
438450
/// The current repeat number of machine outlining.
439451
unsigned OutlineRepeatedNum = 0;
440452

441-
/// Set to true if the outliner should run on all functions in the module
442-
/// considered safe for outlining.
443-
/// Set to true by default for compatibility with llc's -run-pass option.
444-
/// Set when the pass is constructed in TargetPassConfig.
445-
bool RunOnAllFunctions = true;
453+
/// The mode for whether to run the outliner
454+
/// Set to always-outline by default for compatibility with llc's -run-pass
455+
/// option.
456+
RunOutliner RunOutlinerMode = RunOutliner::AlwaysOutline;
446457

447458
/// This is a compact representation of hash sequences of outlined functions.
448459
/// It is used when OutlinerMode = CGDataMode::Write.
@@ -468,6 +479,11 @@ struct MachineOutliner : public ModulePass {
468479
AU.addRequired<TargetPassConfig>();
469480
AU.addPreserved<MachineModuleInfoWrapperPass>();
470481
AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
482+
if (RunOutlinerMode == RunOutliner::OptimisticPGO ||
483+
RunOutlinerMode == RunOutliner::ConservativePGO) {
484+
AU.addRequired<BlockFrequencyInfoWrapperPass>();
485+
AU.addRequired<ProfileSummaryInfoWrapperPass>();
486+
}
471487
AU.setPreservesAll();
472488
ModulePass::getAnalysisUsage(AU);
473489
}
@@ -578,9 +594,9 @@ struct MachineOutliner : public ModulePass {
578594
char MachineOutliner::ID = 0;
579595

580596
namespace llvm {
581-
ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
597+
ModulePass *createMachineOutlinerPass(RunOutliner RunOutlinerMode) {
582598
MachineOutliner *OL = new MachineOutliner();
583-
OL->RunOnAllFunctions = RunOnAllFunctions;
599+
OL->RunOutlinerMode = RunOutlinerMode;
584600
return OL;
585601
}
586602

@@ -1198,10 +1214,49 @@ bool MachineOutliner::outline(
11981214
return OutlinedSomething;
11991215
}
12001216

1217+
static bool allowPGOOutlining(RunOutliner RunOutlinerMode,
1218+
const ProfileSummaryInfo *PSI,
1219+
const BlockFrequencyInfo *BFI,
1220+
MachineBasicBlock &MBB) {
1221+
if (RunOutlinerMode != RunOutliner::OptimisticPGO &&
1222+
RunOutlinerMode != RunOutliner::ConservativePGO)
1223+
return true;
1224+
auto *MF = MBB.getParent();
1225+
if (MF->getFunction().hasFnAttribute(Attribute::Cold)) {
1226+
++NumPGOAllowedCold;
1227+
return true;
1228+
}
1229+
1230+
auto *BB = MBB.getBasicBlock();
1231+
if (BB && PSI && BFI)
1232+
if (auto Count = BFI->getBlockProfileCount(BB))
1233+
return *Count <= PSI->getOrCompColdCountThreshold();
1234+
1235+
if (RunOutlinerMode == RunOutliner::OptimisticPGO) {
1236+
auto *TII = MF->getSubtarget().getInstrInfo();
1237+
if (TII->shouldOutlineFromFunctionByDefault(*MF)) {
1238+
// Profile data is unavailable, but we optimistically allow outlining
1239+
++NumPGOOptimisticOutlined;
1240+
return true;
1241+
}
1242+
return false;
1243+
}
1244+
assert(RunOutlinerMode == RunOutliner::ConservativePGO);
1245+
// Profile data is unavailable, so we conservatively block outlining
1246+
++NumPGOConservativeBlockedOutlined;
1247+
return false;
1248+
}
1249+
12011250
void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
12021251
// Build instruction mappings for each function in the module. Start by
12031252
// iterating over each Function in M.
12041253
LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
1254+
bool EnableProfileGuidedOutlining =
1255+
RunOutlinerMode == RunOutliner::OptimisticPGO ||
1256+
RunOutlinerMode == RunOutliner::ConservativePGO;
1257+
ProfileSummaryInfo *PSI = nullptr;
1258+
if (EnableProfileGuidedOutlining)
1259+
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
12051260
for (Function &F : M) {
12061261
LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
12071262

@@ -1222,7 +1277,11 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
12221277
}
12231278

12241279
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1225-
if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) {
1280+
BlockFrequencyInfo *BFI = nullptr;
1281+
if (EnableProfileGuidedOutlining && F.hasProfileData())
1282+
BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1283+
if (RunOutlinerMode == RunOutliner::TargetDefault &&
1284+
!TII->shouldOutlineFromFunctionByDefault(*MF)) {
12261285
LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
12271286
"function by default\n");
12281287
continue;
@@ -1262,6 +1321,11 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
12621321
continue;
12631322
}
12641323

1324+
if (!allowPGOOutlining(RunOutlinerMode, PSI, BFI, MBB)) {
1325+
++NumPGOBlockedOutlined;
1326+
continue;
1327+
}
1328+
12651329
// MBB is suitable for outlining. Map it to a list of unsigneds.
12661330
Mapper.convertToUnsignedVec(MBB, *TII);
12671331
}
@@ -1434,10 +1498,22 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
14341498
// the user how the outliner is running.
14351499
LLVM_DEBUG({
14361500
dbgs() << "Machine Outliner: Running on ";
1437-
if (RunOnAllFunctions)
1501+
switch (RunOutlinerMode) {
1502+
case RunOutliner::AlwaysOutline:
14381503
dbgs() << "all functions";
1439-
else
1504+
break;
1505+
case RunOutliner::OptimisticPGO:
1506+
dbgs() << "optimistically cold functions";
1507+
break;
1508+
case RunOutliner::ConservativePGO:
1509+
dbgs() << "conservatively cold functions";
1510+
break;
1511+
case RunOutliner::TargetDefault:
14401512
dbgs() << "target-default functions";
1513+
break;
1514+
case RunOutliner::NeverOutline:
1515+
llvm_unreachable("should not outline");
1516+
}
14411517
dbgs() << "\n";
14421518
});
14431519

llvm/lib/CodeGen/TargetPassConfig.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,18 @@ static cl::opt<cl::boolOrDefault> DebugifyCheckAndStripAll(
134134
static cl::opt<RunOutliner> EnableMachineOutliner(
135135
"enable-machine-outliner", cl::desc("Enable the machine outliner"),
136136
cl::Hidden, cl::ValueOptional, cl::init(RunOutliner::TargetDefault),
137-
cl::values(clEnumValN(RunOutliner::AlwaysOutline, "always",
138-
"Run on all functions guaranteed to be beneficial"),
139-
clEnumValN(RunOutliner::NeverOutline, "never",
140-
"Disable all outlining"),
141-
// Sentinel value for unspecified option.
142-
clEnumValN(RunOutliner::AlwaysOutline, "", "")));
137+
cl::values(
138+
clEnumValN(RunOutliner::AlwaysOutline, "always",
139+
"Run on all functions guaranteed to be beneficial"),
140+
clEnumValN(RunOutliner::OptimisticPGO, "optimistic-pgo",
141+
"Outline cold code only. If a code block does not have "
142+
"profile data, optimistically assume it is cold."),
143+
clEnumValN(RunOutliner::ConservativePGO, "conservative-pgo",
144+
"Outline cold code only. If a code block does not have "
145+
"profile, data, conservatively assume it is hot."),
146+
clEnumValN(RunOutliner::NeverOutline, "never", "Disable all outlining"),
147+
// Sentinel value for unspecified option.
148+
clEnumValN(RunOutliner::AlwaysOutline, "", "")));
143149
static cl::opt<bool> EnableGlobalMergeFunc(
144150
"enable-global-merge-func", cl::Hidden,
145151
cl::desc("Enable global merge functions that are based on hash function"));
@@ -1224,12 +1230,9 @@ void TargetPassConfig::addMachinePasses() {
12241230
if (TM->Options.EnableMachineOutliner &&
12251231
getOptLevel() != CodeGenOptLevel::None &&
12261232
EnableMachineOutliner != RunOutliner::NeverOutline) {
1227-
bool RunOnAllFunctions =
1228-
(EnableMachineOutliner == RunOutliner::AlwaysOutline);
1229-
bool AddOutliner =
1230-
RunOnAllFunctions || TM->Options.SupportsDefaultOutlining;
1231-
if (AddOutliner)
1232-
addPass(createMachineOutlinerPass(RunOnAllFunctions));
1233+
if (EnableMachineOutliner != RunOutliner::TargetDefault ||
1234+
TM->Options.SupportsDefaultOutlining)
1235+
addPass(createMachineOutlinerPass(EnableMachineOutliner));
12331236
}
12341237

12351238
if (GCEmptyBlocks)
Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,37 @@
11
; REQUIRES: asserts
2-
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
3-
; RUN: --debug-only=machine-outliner -enable-machine-outliner=always \
4-
; RUN: -mtriple arm64---- -o /dev/null 2>&1 \
5-
; RUN: | FileCheck %s -check-prefix=ALWAYS
2+
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=always -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,ALWAYS
3+
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,ALWAYS
64

7-
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
8-
; RUN: --debug-only=machine-outliner -enable-machine-outliner \
9-
; RUN: -mtriple arm64---- -o /dev/null 2>&1 \
10-
; RUN: | FileCheck %s -check-prefix=ENABLE
5+
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,TARGET-DEFAULT
116

12-
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
13-
; RUN: -enable-machine-outliner=never -mtriple arm64---- -o /dev/null 2>&1 \
14-
; RUN: | FileCheck %s -check-prefix=NEVER
7+
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=optimistic-pgo -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,OPTIMISTIC
158

16-
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
17-
; RUN: --debug-only=machine-outliner -mtriple arm64---- -o /dev/null 2>&1 \
18-
; RUN: | FileCheck %s -check-prefix=NOT-ADDED
9+
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=conservative-pgo -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,CONSERVATIVE
1910

20-
; RUN: llc %s -O=0 -debug-pass=Structure -verify-machineinstrs \
21-
; RUN: -mtriple arm64---- -o /dev/null 2>&1 \
22-
; RUN: | FileCheck %s -check-prefix=OPTNONE
11+
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=never -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefix=DISABLED
12+
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -O=0 -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefix=DISABLED
2313

2414
; Make sure that the outliner is added to the pass pipeline only when the
2515
; appropriate flags/settings are set. Make sure it isn't added otherwise.
2616
;
2717
; Cases where it should be added:
2818
; * -enable-machine-outliner
2919
; * -enable-machine-outliner=always
30-
; * -enable-machine-outliner is not passed (AArch64 supports
31-
; target-default outlining)
20+
; * -enable-machine-outliner=optimistic-pgo
21+
; * -enable-machine-outliner=conservative-pgo
22+
; * -enable-machine-outliner is not passed (AArch64 supports target-default outlining)
3223
;
3324
; Cases where it should not be added:
3425
; * -O0 or equivalent
3526
; * -enable-machine-outliner=never is passed
3627

37-
; ALWAYS: Machine Outliner
28+
; CHECK: Machine Outliner
29+
; DISABLED-NOT: Machine Outliner
3830
; ALWAYS: Machine Outliner: Running on all functions
39-
; ENABLE: Machine Outliner
40-
; ENABLE: Machine Outliner: Running on all functions
41-
; NEVER-NOT: Machine Outliner
42-
; NOT-ADDED: Machine Outliner
43-
; NOT-ADDED: Machine Outliner: Running on target-default functions
44-
; OPTNONE-NOT: Machine Outliner
31+
; OPTIMISTIC: Machine Outliner: Running on optimistically cold functions
32+
; CONSERVATIVE: Machine Outliner: Running on conservatively cold functions
33+
; TARGET-DEFAULT: Machine Outliner: Running on target-default functions
4534

4635
define void @foo() {
4736
ret void;
4837
}
49-

0 commit comments

Comments
 (0)