From 4f21fb84479286ddc781d73f8df152f81a8264e7 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu <274595+qiongsiwu@users.noreply.github.com> Date: Mon, 22 Jan 2024 14:54:58 -0500 Subject: [PATCH] [PGO] Reland PGO's Counter Reset and File Dumping APIs #76471 (#78285) https://github.com/llvm/llvm-project/pull/76471 caused buildbot failures on Windows. For more details, see https://github.com/llvm/llvm-project/issues/77546. This PR revises the test and relands https://github.com/llvm/llvm-project/pull/76471. --- .../ExpandModularHeadersPPCallbacks.cpp | 2 +- clang/docs/UsersManual.rst | 104 ++++++++++++++++++ clang/include/clang/Basic/CodeGenOptions.h | 3 + clang/include/clang/Frontend/Utils.h | 4 +- clang/lib/Frontend/CompilerInstance.cpp | 2 +- clang/lib/Frontend/InitPreprocessor.cpp | 23 +++- clang/test/Profile/c-general.c | 10 ++ compiler-rt/include/CMakeLists.txt | 1 + .../include/profile/instr_prof_interface.h | 92 ++++++++++++++++ compiler-rt/lib/profile/InstrProfiling.h | 61 ++-------- .../profile/Linux/instrprof-weak-symbol.c | 16 +++ compiler-rt/test/profile/instrprof-api.c | 49 +++++++++ 12 files changed, 310 insertions(+), 57 deletions(-) create mode 100644 compiler-rt/include/profile/instr_prof_interface.h create mode 100644 compiler-rt/test/profile/Linux/instrprof-weak-symbol.c create mode 100644 compiler-rt/test/profile/instrprof-api.c diff --git a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.cpp b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.cpp index e414ac8c77050..5ecd4fb19131e 100644 --- a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.cpp +++ b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.cpp @@ -100,7 +100,7 @@ ExpandModularHeadersPPCallbacks::ExpandModularHeadersPPCallbacks( /*OwnsHeaderSearch=*/false); PP->Initialize(Compiler.getTarget(), Compiler.getAuxTarget()); InitializePreprocessor(*PP, *PO, Compiler.getPCHContainerReader(), - Compiler.getFrontendOpts()); + Compiler.getFrontendOpts(), Compiler.getCodeGenOpts()); ApplyHeaderSearchOptions(*HeaderInfo, *HSO, LangOpts, Compiler.getTarget().getTriple()); } diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 881d903d91a7e..ff2d4a68b8e55 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2809,6 +2809,110 @@ indexed format, regardeless whether it is produced by frontend or the IR pass. overhead. ``prefer-atomic`` will be transformed to ``atomic`` when supported by the target, or ``single`` otherwise. +Fine Tuning Profile Collection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The PGO infrastructure provides user program knobs to fine tune profile +collection. Specifically, the PGO runtime provides the following functions +that can be used to control the regions in the program where profiles should +be collected. + + * ``void __llvm_profile_set_filename(const char *Name)``: changes the name of + the profile file to ``Name``. + * ``void __llvm_profile_reset_counters(void)``: resets all counters to zero. + * ``int __llvm_profile_dump(void)``: write the profile data to disk. + * ``int __llvm_orderfile_dump(void)``: write the order file to disk. + +For example, the following pattern can be used to skip profiling program +initialization, profile two specific hot regions, and skip profiling program +cleanup: + +.. code-block:: c + + int main() { + initialize(); + + // Reset all profile counters to 0 to omit profile collected during + // initialize()'s execution. + __llvm_profile_reset_counters(); + ... hot region 1 + // Dump the profile for hot region 1. + __llvm_profile_set_filename("region1.profraw"); + __llvm_profile_dump(); + + // Reset counters before proceeding to hot region 2. + __llvm_profile_reset_counters(); + ... hot region 2 + // Dump the profile for hot region 2. + __llvm_profile_set_filename("region2.profraw"); + __llvm_profile_dump(); + + // Since the profile has been dumped, no further profile data + // will be collected beyond the above __llvm_profile_dump(). + cleanup(); + return 0; + } + +These APIs' names can be introduced to user programs in two ways. +They can be declared as weak symbols on platforms which support +treating weak symbols as ``null`` during linking. For example, the user can +have + +.. code-block:: c + + __attribute__((weak)) int __llvm_profile_dump(void); + + // Then later in the same source file + if (__llvm_profile_dump) + if (__llvm_profile_dump() != 0) { ... } + // The first if condition tests if the symbol is actually defined. + // Profile dumping only happens if the symbol is defined. Hence, + // the user program works correctly during normal (not profile-generate) + // executions. + +Alternatively, the user program can include the header +``profile/instr_prof_interface.h``, which contains the API names. For example, + +.. code-block:: c + + #include "profile/instr_prof_interface.h" + + // Then later in the same source file + if (__llvm_profile_dump() != 0) { ... } + +The user code does not need to check if the API names are defined, because +these names are automatically replaced by ``(0)`` or the equivalence of noop +if the ``clang`` is not compiling for profile generation. + +Such replacement can happen because ``clang`` adds one of two macros depending +on the ``-fprofile-generate`` and the ``-fprofile-use`` flags. + + * ``__LLVM_INSTR_PROFILE_GENERATE``: defined when one of + ``-fprofile[-instr]-generate``/``-fcs-profile-generate`` is in effect. + * ``__LLVM_INSTR_PROFILE_USE``: defined when one of + ``-fprofile-use``/``-fprofile-instr-use`` is in effect. + +The two macros can be used to provide more flexibiilty so a user program +can execute code specifically intended for profile generate or profile use. +For example, a user program can have special logging during profile generate: + +.. code-block:: c + + #if __LLVM_INSTR_PROFILE_GENERATE + expensive_logging_of_full_program_state(); + #endif + +The logging is automatically excluded during a normal build of the program, +hence it does not impact performance during a normal execution. + +It is advised to use such fine tuning only in a program's cold regions. The weak +symbols can introduce extra control flow (the ``if`` checks), while the macros +(hence declarations they guard in ``profile/instr_prof_interface.h``) +can change the control flow of the functions that use them between profile +generation and profile use (which can lead to discarded counters in such +functions). Using these APIs in the program's cold regions introduces less +overhead and leads to more optimized code. + Disabling Instrumentation ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 6952b48e898a8..3f8fe385fef3d 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -494,6 +494,9 @@ class CodeGenOptions : public CodeGenOptionsBase { return getProfileInstr() == ProfileCSIRInstr; } + /// Check if any form of instrumentation is on. + bool hasProfileInstr() const { return getProfileInstr() != ProfileNone; } + /// Check if Clang profile use is on. bool hasProfileClangUse() const { return getProfileUse() == ProfileClangInstr; diff --git a/clang/include/clang/Frontend/Utils.h b/clang/include/clang/Frontend/Utils.h index 143cf4359f00b..604e42067a3f1 100644 --- a/clang/include/clang/Frontend/Utils.h +++ b/clang/include/clang/Frontend/Utils.h @@ -43,12 +43,14 @@ class PCHContainerReader; class Preprocessor; class PreprocessorOptions; class PreprocessorOutputOptions; +class CodeGenOptions; /// InitializePreprocessor - Initialize the preprocessor getting it and the /// environment ready to process a single file. void InitializePreprocessor(Preprocessor &PP, const PreprocessorOptions &PPOpts, const PCHContainerReader &PCHContainerRdr, - const FrontendOptions &FEOpts); + const FrontendOptions &FEOpts, + const CodeGenOptions &CodeGenOpts); /// DoPrintPreprocessedInput - Implement -E mode. void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index c258870072613..a25aa88bd85ef 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -470,7 +470,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { // Predefine macros and configure the preprocessor. InitializePreprocessor(*PP, PPOpts, getPCHContainerReader(), - getFrontendOpts()); + getFrontendOpts(), getCodeGenOpts()); // Initialize the header search object. In CUDA compilations, we use the aux // triple (the host triple) to initialize our header search, since we need to diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index d83128adb511e..fe0fd3614113c 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1364,12 +1364,22 @@ static void InitializePredefinedMacros(const TargetInfo &TI, TI.getTargetDefines(LangOpts, Builder); } +static void InitializePGOProfileMacros(const CodeGenOptions &CodeGenOpts, + MacroBuilder &Builder) { + if (CodeGenOpts.hasProfileInstr()) + Builder.defineMacro("__LLVM_INSTR_PROFILE_GENERATE"); + + if (CodeGenOpts.hasProfileIRUse() || CodeGenOpts.hasProfileClangUse()) + Builder.defineMacro("__LLVM_INSTR_PROFILE_USE"); +} + /// InitializePreprocessor - Initialize the preprocessor getting it and the /// environment ready to process a single file. -void clang::InitializePreprocessor( - Preprocessor &PP, const PreprocessorOptions &InitOpts, - const PCHContainerReader &PCHContainerRdr, - const FrontendOptions &FEOpts) { +void clang::InitializePreprocessor(Preprocessor &PP, + const PreprocessorOptions &InitOpts, + const PCHContainerReader &PCHContainerRdr, + const FrontendOptions &FEOpts, + const CodeGenOptions &CodeGenOpts) { const LangOptions &LangOpts = PP.getLangOpts(); std::string PredefineBuffer; PredefineBuffer.reserve(4080); @@ -1416,6 +1426,11 @@ void clang::InitializePreprocessor( InitializeStandardPredefinedMacros(PP.getTargetInfo(), PP.getLangOpts(), FEOpts, Builder); + // The PGO instrumentation profile macros are driven by options + // -fprofile[-instr]-generate/-fcs-profile-generate/-fprofile[-instr]-use, + // hence they are not guarded by InitOpts.UsePredefines. + InitializePGOProfileMacros(CodeGenOpts, Builder); + // Add on the predefines from the driver. Wrap in a #line directive to report // that they come from the command line. Builder.append("# 1 \"\" 1"); diff --git a/clang/test/Profile/c-general.c b/clang/test/Profile/c-general.c index b841f9c3d2a1d..2f621ec9b0bf9 100644 --- a/clang/test/Profile/c-general.c +++ b/clang/test/Profile/c-general.c @@ -9,6 +9,16 @@ // Also check compatibility with older profiles. // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use-path=%S/Inputs/c-general.profdata.v1 | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOUSE %s +// RUN: %clang -fprofile-generate -E -dM %s | FileCheck -match-full-lines -check-prefix=PROFGENMACRO %s +// RUN: %clang -fprofile-instr-generate -E -dM %s | FileCheck -match-full-lines -check-prefix=PROFGENMACRO %s +// RUN: %clang -fcs-profile-generate -E -dM %s | FileCheck -match-full-lines -check-prefix=PROFGENMACRO %s +// +// RUN: %clang -fprofile-use=%t.profdata -E -dM %s | FileCheck -match-full-lines -check-prefix=PROFUSEMACRO %s +// RUN: %clang -fprofile-instr-use=%t.profdata -E -dM %s | FileCheck -match-full-lines -check-prefix=PROFUSEMACRO %s + +// PROFGENMACRO:#define __LLVM_INSTR_PROFILE_GENERATE 1 +// PROFUSEMACRO:#define __LLVM_INSTR_PROFILE_USE 1 + // PGOGEN: @[[SLC:__profc_simple_loops]] = private global [4 x i64] zeroinitializer // PGOGEN: @[[IFC:__profc_conditionals]] = private global [13 x i64] zeroinitializer // PGOGEN: @[[EEC:__profc_early_exits]] = private global [9 x i64] zeroinitializer diff --git a/compiler-rt/include/CMakeLists.txt b/compiler-rt/include/CMakeLists.txt index 78427beedb3cc..7a100c66bbcfd 100644 --- a/compiler-rt/include/CMakeLists.txt +++ b/compiler-rt/include/CMakeLists.txt @@ -44,6 +44,7 @@ endif(COMPILER_RT_BUILD_ORC) if (COMPILER_RT_BUILD_PROFILE) set(PROFILE_HEADERS profile/InstrProfData.inc + profile/instr_prof_interface.h ) endif(COMPILER_RT_BUILD_PROFILE) diff --git a/compiler-rt/include/profile/instr_prof_interface.h b/compiler-rt/include/profile/instr_prof_interface.h new file mode 100644 index 0000000000000..be40f2685934b --- /dev/null +++ b/compiler-rt/include/profile/instr_prof_interface.h @@ -0,0 +1,92 @@ +/*===---- instr_prof_interface.h - Instrumentation PGO User Program API ----=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + * + * This header provides a public interface for fine-grained control of counter + * reset and profile dumping. These interface functions can be directly called + * in user programs. + * +\*===---------------------------------------------------------------------===*/ + +#ifndef COMPILER_RT_INSTR_PROFILING +#define COMPILER_RT_INSTR_PROFILING + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __LLVM_INSTR_PROFILE_GENERATE +// Profile file reset and dump interfaces. +// When `-fprofile[-instr]-generate`/`-fcs-profile-generate` is in effect, +// clang defines __LLVM_INSTR_PROFILE_GENERATE to pick up the API calls. + +/*! + * \brief Set the filename for writing instrumentation data. + * + * Sets the filename to be used for subsequent calls to + * \a __llvm_profile_write_file(). + * + * \c Name is not copied, so it must remain valid. Passing NULL resets the + * filename logic to the default behaviour. + * + * Note: There may be multiple copies of the profile runtime (one for each + * instrumented image/DSO). This API only modifies the filename within the + * copy of the runtime available to the calling image. + * + * Warning: This is a no-op if continuous mode (\ref + * __llvm_profile_is_continuous_mode_enabled) is on. The reason for this is + * that in continuous mode, profile counters are mmap()'d to the profile at + * program initialization time. Support for transferring the mmap'd profile + * counts to a new file has not been implemented. + */ +void __llvm_profile_set_filename(const char *Name); + +/*! + * \brief Interface to set all PGO counters to zero for the current process. + * + */ +void __llvm_profile_reset_counters(void); + +/*! + * \brief this is a wrapper interface to \c __llvm_profile_write_file. + * After this interface is invoked, an already dumped flag will be set + * so that profile won't be dumped again during program exit. + * Invocation of interface __llvm_profile_reset_counters will clear + * the flag. This interface is designed to be used to collect profile + * data from user selected hot regions. The use model is + * __llvm_profile_reset_counters(); + * ... hot region 1 + * __llvm_profile_dump(); + * .. some other code + * __llvm_profile_reset_counters(); + * ... hot region 2 + * __llvm_profile_dump(); + * + * It is expected that on-line profile merging is on with \c %m specifier + * used in profile filename . If merging is not turned on, user is expected + * to invoke __llvm_profile_set_filename to specify different profile names + * for different regions before dumping to avoid profile write clobbering. + */ +int __llvm_profile_dump(void); + +// Interface to dump the current process' order file to disk. +int __llvm_orderfile_dump(void); + +#else + +#define __llvm_profile_set_filename(Name) +#define __llvm_profile_reset_counters() +#define __llvm_profile_dump() (0) +#define __llvm_orderfile_dump() (0) + +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h index 137115996748c..0123908336918 100644 --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -12,6 +12,17 @@ #include "InstrProfilingPort.h" #include +// Make sure __LLVM_INSTR_PROFILE_GENERATE is always defined before +// including instr_prof_interface.h so the interface functions are +// declared correctly for the runtime. +// __LLVM_INSTR_PROFILE_GENERATE is always `#undef`ed after the header, +// because compiler-rt does not support profiling the profiling runtime itself. +#ifndef __LLVM_INSTR_PROFILE_GENERATE +#define __LLVM_INSTR_PROFILE_GENERATE +#endif +#include "profile/instr_prof_interface.h" +#undef __LLVM_INSTR_PROFILE_GENERATE + #define INSTR_PROF_VISIBILITY COMPILER_RT_VISIBILITY #include "profile/InstrProfData.inc" @@ -100,12 +111,6 @@ ValueProfNode *__llvm_profile_begin_vnodes(); ValueProfNode *__llvm_profile_end_vnodes(); uint32_t *__llvm_profile_begin_orderfile(); -/*! - * \brief Clear profile counters to zero. - * - */ -void __llvm_profile_reset_counters(void); - /*! * \brief Merge profile data from buffer. * @@ -156,50 +161,6 @@ void __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, int __llvm_profile_write_file(void); int __llvm_orderfile_write_file(void); -/*! - * \brief this is a wrapper interface to \c __llvm_profile_write_file. - * After this interface is invoked, an already dumped flag will be set - * so that profile won't be dumped again during program exit. - * Invocation of interface __llvm_profile_reset_counters will clear - * the flag. This interface is designed to be used to collect profile - * data from user selected hot regions. The use model is - * __llvm_profile_reset_counters(); - * ... hot region 1 - * __llvm_profile_dump(); - * .. some other code - * __llvm_profile_reset_counters(); - * ... hot region 2 - * __llvm_profile_dump(); - * - * It is expected that on-line profile merging is on with \c %m specifier - * used in profile filename . If merging is not turned on, user is expected - * to invoke __llvm_profile_set_filename to specify different profile names - * for different regions before dumping to avoid profile write clobbering. - */ -int __llvm_profile_dump(void); - -int __llvm_orderfile_dump(void); - -/*! - * \brief Set the filename for writing instrumentation data. - * - * Sets the filename to be used for subsequent calls to - * \a __llvm_profile_write_file(). - * - * \c Name is not copied, so it must remain valid. Passing NULL resets the - * filename logic to the default behaviour. - * - * Note: There may be multiple copies of the profile runtime (one for each - * instrumented image/DSO). This API only modifies the filename within the - * copy of the runtime available to the calling image. - * - * Warning: This is a no-op if continuous mode (\ref - * __llvm_profile_is_continuous_mode_enabled) is on. The reason for this is - * that in continuous mode, profile counters are mmap()'d to the profile at - * program initialization time. Support for transferring the mmap'd profile - * counts to a new file has not been implemented. - */ -void __llvm_profile_set_filename(const char *Name); /*! * \brief Set the FILE object for writing instrumentation data. Return 0 if set diff --git a/compiler-rt/test/profile/Linux/instrprof-weak-symbol.c b/compiler-rt/test/profile/Linux/instrprof-weak-symbol.c new file mode 100644 index 0000000000000..eda299cb6610e --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-weak-symbol.c @@ -0,0 +1,16 @@ +// Test the linker feature that treats undefined weak symbols as null values. + +// RUN: %clang_pgogen -o %t %s +// RUN: not %t +// RUN: %clang -o %t %s +// RUN: %t + +__attribute__((weak)) void __llvm_profile_reset_counters(void); + +int main() { + if (__llvm_profile_reset_counters) { + __llvm_profile_reset_counters(); + return 1; + } + return 0; +} diff --git a/compiler-rt/test/profile/instrprof-api.c b/compiler-rt/test/profile/instrprof-api.c new file mode 100644 index 0000000000000..fedec2d1afc48 --- /dev/null +++ b/compiler-rt/test/profile/instrprof-api.c @@ -0,0 +1,49 @@ +// UNSUPPORTED: target={{.*windows.*}} +// __llvm_orderfile_dump() is not supported on Windows. + +// Testing profile generate. +// RUN: %clang_profgen %s -S -emit-llvm -o - | FileCheck %s --check-prefix=PROFGEN +// RUN: %clang_pgogen %s -S -emit-llvm -o - | FileCheck %s --check-prefix=PROFGEN + +// Testing profile use. Generate some profile file first. +// RUN: rm -rf rawprof.profraw +// RUN: %clang_profgen -o %t1 %s +// RUN: %run %t1 +// RUN: llvm-profdata merge -o %t1.profdata rawprof.profraw +// RUN: %clang_profuse=%t1.profdata %s -S -emit-llvm -o - | FileCheck %s --check-prefix=PROFUSE +// RUN: rm -rf rawprof.profraw +// RUN: %clang_pgogen -o %t2 %s +// RUN: %run %t2 +// RUN: llvm-profdata merge -o %t2.profdata rawprof.profraw +// RUN: %clang_pgouse=%t2.profdata %s -S -emit-llvm -o - | FileCheck %s --check-prefix=PROFUSE +#include "profile/instr_prof_interface.h" + +__attribute__((noinline)) int bar() { return 4; } + +int foo() { + __llvm_profile_reset_counters(); + // PROFGEN: call void @__llvm_profile_reset_counters() + // PROFUSE-NOT: call void @__llvm_profile_reset_counters() + return bar(); +} + +// PROFUSE-NOT: declare void @__llvm_profile_reset_counters() + +int main() { + int z = foo() + 3; + __llvm_profile_set_filename("rawprof.profraw"); + // PROFGEN: call void @__llvm_profile_set_filename(ptr noundef @{{.*}}) + // PROFUSE-NOT: call void @__llvm_profile_set_filename(ptr noundef @{{.*}}) + if (__llvm_profile_dump()) + return 2; + // PROFGEN: %{{.*}} = call {{(signext )*}}i32 @__llvm_profile_dump() + // PROFUSE-NOT: %{{.*}} = call {{(signext )*}}i32 @__llvm_profile_dump() + __llvm_orderfile_dump(); + // PROFGEN: %{{.*}} = call {{(signext )*}}i32 @__llvm_orderfile_dump() + // PROFUSE-NOT: %{{.*}} = call {{(signext )*}}i32 @__llvm_orderfile_dump() + return z + bar() - 11; +} + +// PROFUSE-NOT: declare void @__llvm_profile_set_filename(ptr noundef) +// PROFUSE-NOT: declare signext i32 @__llvm_profile_dump() +// PROFUSE-NOT: declare signext i32 @__llvm_orderfile_dump()