diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e61558c59bf0d..c1f570d688793 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -229,6 +229,19 @@ static bool dontUseFastISelFor(const Function &Fn) { }); } +static bool maintainPGOProfile(const TargetMachine &TM, + CodeGenOptLevel OptLevel) { + if (OptLevel != CodeGenOptLevel::None) + return true; + if (TM.getPGOOption()) { + const PGOOptions &Options = *TM.getPGOOption(); + return Options.Action == PGOOptions::PGOAction::IRUse || + Options.Action == PGOOptions::PGOAction::SampleUse || + Options.CSAction == PGOOptions::CSPGOAction::CSIRUse; + } + return false; +} + namespace llvm { //===--------------------------------------------------------------------===// @@ -390,6 +403,7 @@ SelectionDAGISel::~SelectionDAGISel() { delete CurDAG; } void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { CodeGenOptLevel OptLevel = Selector->OptLevel; + bool RegisterPGOPasses = maintainPGOProfile(Selector->TM, Selector->OptLevel); if (OptLevel != CodeGenOptLevel::None) AU.addRequired(); AU.addRequired(); @@ -398,15 +412,15 @@ void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); - if (UseMBPI && OptLevel != CodeGenOptLevel::None) - AU.addRequired(); + if (UseMBPI && RegisterPGOPasses) + AU.addRequired(); AU.addRequired(); // AssignmentTrackingAnalysis only runs if assignment tracking is enabled for // the module. AU.addRequired(); AU.addPreserved(); - if (OptLevel != CodeGenOptLevel::None) - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); + if (RegisterPGOPasses) + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -459,6 +473,7 @@ void SelectionDAGISel::initializeAnalysisResults( (void)MatchFilterFuncName; #endif + bool RegisterPGOPasses = maintainPGOProfile(TM, OptLevel); TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); @@ -469,7 +484,7 @@ void SelectionDAGISel::initializeAnalysisResults( auto *PSI = MAMP.getCachedResult(*Fn.getParent()); BlockFrequencyInfo *BFI = nullptr; FAM.getResult(Fn); - if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) + if (PSI && PSI->hasProfileSummary() && RegisterPGOPasses) BFI = &FAM.getResult(Fn); FunctionVarLocs const *FnVarLocs = nullptr; @@ -487,7 +502,7 @@ void SelectionDAGISel::initializeAnalysisResults( // into account). That's unfortunate but OK because it just means we won't // ask for passes that have been required anyway. - if (UseMBPI && OptLevel != CodeGenOptLevel::None) + if (UseMBPI && RegisterPGOPasses) FuncInfo->BPI = &FAM.getResult(Fn); else FuncInfo->BPI = nullptr; @@ -513,6 +528,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { (void)MatchFilterFuncName; #endif + bool RegisterPGOPasses = maintainPGOProfile(TM, OptLevel); TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); @@ -523,7 +539,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { AC = &MFP.getAnalysis().getAssumptionCache(Fn); auto *PSI = &MFP.getAnalysis().getPSI(); BlockFrequencyInfo *BFI = nullptr; - if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) + if (PSI && PSI->hasProfileSummary() && RegisterPGOPasses) BFI = &MFP.getAnalysis().getBFI(); FunctionVarLocs const *FnVarLocs = nullptr; @@ -544,7 +560,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { // into account). That's unfortunate but OK because it just means we won't // ask for passes that have been required anyway. - if (UseMBPI && OptLevel != CodeGenOptLevel::None) + if (UseMBPI && RegisterPGOPasses) FuncInfo->BPI = &MFP.getAnalysis().getBPI(); else diff --git a/llvm/test/CodeGen/X86/pgo-profile-o0.ll b/llvm/test/CodeGen/X86/pgo-profile-o0.ll new file mode 100644 index 0000000000000..f9704fcf0ec3a --- /dev/null +++ b/llvm/test/CodeGen/X86/pgo-profile-o0.ll @@ -0,0 +1,49 @@ +; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -debug-pass=Structure %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=PASSES +; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -debug-only=branch-prob %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=BRANCH_PROB +; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -stop-after=finalize-isel %s -o - | FileCheck %s --check-prefix=MIR + +; REQUIRES: asserts + +; This test verifies that PGO profile information (branch weights) is preserved +; during instruction selection at -O0. + +; Test function with explicit branch weights from PGO. +define i32 @test_pgo_preservation(i32 %x) !prof !15 { +entry: + %cmp = icmp sgt i32 %x, 10 + ; This branch has bias: 97 taken vs 3 not taken + br i1 %cmp, label %if.then, label %if.else, !prof !16 + +if.then: + ; Hot path - should have high frequency + %add = add nsw i32 %x, 100 + br label %if.end + +if.else: + ; Cold path - should have low frequency + %sub = sub nsw i32 %x, 50 + br label %if.end + +if.end: + %result = phi i32 [ %add, %if.then ], [ %sub, %if.else ] + ret i32 %result +} + +; Profile metadata with branch weights 97:3. +!15 = !{!"function_entry_count", i64 100} +!16 = !{!"branch_weights", i32 97, i32 3} + +; Verify that Branch Probability Analysis runs at O0. +; PASSES: Branch Probability Analysis + +; Verify that the branch probabilities reflect the exact profile data. +; BRANCH_PROB: ---- Branch Probability Info : test_pgo_preservation ---- +; BRANCH_PROB: set edge entry -> 0 successor probability to {{.*}} = 97.00% +; BRANCH_PROB: set edge entry -> 1 successor probability to {{.*}} = 3.00% + +; Verify that machine IR preserves the branch probabilities from profile data +; MIR: bb.0.entry: +; MIR-NEXT: successors: %bb.{{[0-9]+}}({{0x03d70a3d|0x7c28f5c3}}), %bb.{{[0-9]+}}({{0x7c28f5c3|0x03d70a3d}}) +; The two successor probability values should be: +; - 0x7c28f5c3: approximately 97% (high probability successor) +; - 0x03d70a3d: approximately 3% (low probability successor) diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index a2327fbc3b66a..9d51e952fa341 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -44,6 +44,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/InitLLVM.h" +#include "llvm/Support/PGOOptions.h" #include "llvm/Support/PluginLoader.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" @@ -239,6 +240,39 @@ static cl::opt> RunPass( cl::desc("Run compiler only for specified passes (comma separated list)"), cl::value_desc("pass-name"), cl::location(RunPassOpt)); +// PGO command line options +enum PGOKind { + NoPGO, + SampleUse, +}; + +static cl::opt + PGOKindFlag("pgo-kind", cl::init(NoPGO), cl::Hidden, + cl::desc("The kind of profile guided optimization"), + cl::values(clEnumValN(NoPGO, "nopgo", "Do not use PGO."), + clEnumValN(SampleUse, "pgo-sample-use-pipeline", + "Use sampled profile to guide PGO."))); + +// Function to set PGO options on TargetMachine based on command line flags. +static void setPGOOptions(TargetMachine &TM) { + std::optional PGOOpt; + + switch (PGOKindFlag) { + case SampleUse: + // Use default values for other PGOOptions parameters. This parameter + // is used to test that PGO data is preserved at -O0. + PGOOpt = PGOOptions("", "", "", "", PGOOptions::SampleUse, + PGOOptions::NoCSAction); + break; + case NoPGO: + PGOOpt = std::nullopt; + break; + } + + if (PGOOpt) + TM.setPGOOption(PGOOpt); +} + static int compileModule(char **, LLVMContext &); [[noreturn]] static void reportError(Twine Msg, StringRef Filename = "") { @@ -554,6 +588,9 @@ static int compileModule(char **argv, LLVMContext &Context) { TheTriple, CPUStr, FeaturesStr, Options, RM, CM, OLvl)); assert(Target && "Could not allocate target machine!"); + // Set PGO options based on command line flags + setPGOOptions(*Target); + return Target->createDataLayout().getStringRepresentation(); }; if (InputLanguage == "mir" || @@ -597,6 +634,9 @@ static int compileModule(char **argv, LLVMContext &Context) { TheTriple, CPUStr, FeaturesStr, Options, RM, CM, OLvl)); assert(Target && "Could not allocate target machine!"); + // Set PGO options based on command line flags + setPGOOptions(*Target); + // If we don't have a module then just exit now. We do this down // here since the CPU/Feature help is underneath the target machine // creation.