Skip to content

Commit

Permalink
[Clang][CMake] Use perf-training for Clang-BOLT
Browse files Browse the repository at this point in the history
Leverage perf-training flow for BOLT profile collection, enabling reproducible
BOLT optimization. Remove the use of bootstrapped build for profile collection.

Test Plan:
- Regular (single-stage) build
```
$ cmake ... -C .../clang/cmake/caches/BOLT.cmake
$ ninja clang-bolt
...
[21/24] Instrumenting clang binary with BOLT
[21/24] Generating BOLT profile for Clang
[23/24] Merging BOLT fdata
Profile from 2 files merged.
[24/24] Optimizing Clang with BOLT
...
          1291202496 : executed instructions (-1.1%)
            27005133 : taken branches (-71.5%)
...
```
- Two stage build (ThinLTO+InstPGO)
```
$ cmake ... -C .../clang/cmake/caches/BOLT.cmake -C .../clang/cmake/caches/BOLT-PGO.cmake
$ ninja clang-bolt
$ ninja stage2-clang-bolt
...
[2756/2759] Instrumenting clang binary with BOLT
[2756/2759] Generating BOLT profile for Clang
[2758/2759] Merging BOLT fdata
[2759/2759] Optimizing Clang with BOLT
...
BOLT-INFO: 7092 out of 184104 functions in the binary (3.9%) have non-empty execution profile
           756531927 : executed instructions (-0.5%)
            15399400 : taken branches (-40.3%)
...
```

Reviewed By: beanz

Differential Revision: https://reviews.llvm.org/D143553
  • Loading branch information
aaupov committed May 13, 2023
1 parent c19c248 commit 76b2915
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 69 deletions.
69 changes: 3 additions & 66 deletions clang/CMakeLists.txt
Expand Up @@ -851,9 +851,8 @@ endif()

if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
set(CLANGXX_PATH ${CLANG_PATH}++)
set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)

# Instrument clang with BOLT
add_custom_target(clang-instrumented
Expand All @@ -863,73 +862,11 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
DEPENDS clang llvm-bolt
COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-instrument --instrumentation-file-append-pid
--instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
--instrumentation-file=${BOLT_FDATA}
COMMENT "Instrumenting clang binary with BOLT"
VERBATIM
)

# Make a symlink from clang-bolt.inst to clang++-bolt.inst
add_custom_target(clang++-instrumented
DEPENDS ${CLANGXX_INSTRUMENTED}
)
add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
DEPENDS clang-instrumented
COMMAND ${CMAKE_COMMAND} -E create_symlink
${CLANG_INSTRUMENTED}
${CLANGXX_INSTRUMENTED}
COMMENT "Creating symlink from BOLT instrumented clang to clang++"
VERBATIM
)

# Build specified targets with instrumented Clang to collect the profile
set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
set(build_configuration "$<CONFIG>")
include(ExternalProject)
ExternalProject_Add(bolt-instrumentation-profile
DEPENDS clang++-instrumented
PREFIX bolt-instrumentation-profile
SOURCE_DIR ${CMAKE_SOURCE_DIR}
STAMP_DIR ${STAMP_DIR}
BINARY_DIR ${BINARY_DIR}
EXCLUDE_FROM_ALL 1
CMAKE_ARGS
${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS}
# We shouldn't need to set this here, but INSTALL_DIR doesn't
# seem to work, so instead I'm passing this through
-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
-DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
-DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
-DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
-DCMAKE_ASM_COMPILER_ID=Clang
-DCMAKE_BUILD_TYPE=Release
-DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}
-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}
BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
--config ${build_configuration}
--target ${CLANG_BOLT_INSTRUMENT_TARGETS}
INSTALL_COMMAND ""
STEP_TARGETS configure build
USES_TERMINAL_CONFIGURE 1
USES_TERMINAL_BUILD 1
USES_TERMINAL_INSTALL 1
)

# Merge profiles into one using merge-fdata
add_custom_target(clang-bolt-profile
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
DEPENDS merge-fdata bolt-instrumentation-profile-build
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${Python3_EXECUTABLE}
${CMAKE_CURRENT_SOURCE_DIR}/utils/perf-training/perf-helper.py merge-fdata
$<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Preparing BOLT profile"
VERBATIM
)

# Optimize original (pre-bolt) Clang using the collected profile
set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
add_custom_target(clang-bolt
Expand All @@ -939,7 +876,7 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
DEPENDS clang-bolt-profile
COMMAND llvm-bolt ${CLANG_PATH}
-o ${CLANG_OPTIMIZED}
-data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-data ${BOLT_FDATA}
-reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
-split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $<TARGET_FILE:clang>
Expand Down
3 changes: 0 additions & 3 deletions clang/cmake/caches/BOLT.cmake
@@ -1,9 +1,6 @@
set(CMAKE_BUILD_TYPE Release CACHE STRING "")
set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")

set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
Expand Down
23 changes: 23 additions & 0 deletions clang/utils/perf-training/CMakeLists.txt
Expand Up @@ -61,3 +61,26 @@ if(APPLE AND DTRACE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD)
COMMENT "Generating order file"
DEPENDS generate-dtrace-logs)
endif()

if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
configure_lit_site_cfg(
${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
)

add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
EXCLUDE_FROM_CHECK_ALL
DEPENDS clang-instrumented clear-bolt-fdata
)

add_custom_target(clear-bolt-fdata
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
COMMENT "Clearing old BOLT fdata")

# Merge profiles into one using merge-fdata
add_custom_target(clang-bolt-profile
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Merging BOLT fdata"
DEPENDS merge-fdata generate-bolt-fdata)
endif()
20 changes: 20 additions & 0 deletions clang/utils/perf-training/bolt.lit.cfg
@@ -0,0 +1,20 @@
# -*- Python -*-

from lit import Test
import lit.formats
import lit.util
import os
import subprocess

config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')

config.name = 'Clang Perf Training'
config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']

use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
config.test_format = lit.formats.ShTest(use_lit_shell == "0")
config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
config.substitutions.append( ('%test_root', config.test_exec_root ) )
14 changes: 14 additions & 0 deletions clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -0,0 +1,14 @@
@LIT_SITE_CFG_IN_HEADER@

import sys

config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@"
config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
config.target_triple = "@LLVM_TARGET_TRIPLE@"
config.python_exe = "@Python3_EXECUTABLE@"
config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")

# Let the main config do the real work.
lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")

0 comments on commit 76b2915

Please sign in to comment.