Skip to content

Commit

Permalink
[compiler-rt] Initial support for builtins on GPU targets
Browse files Browse the repository at this point in the history
Summary:
This patch adds initial support to build the `builtins` library for GPU
targets. Primarily this requires adding a few new architectures for
`amdgcn` and `nvptx64`. I built this using the following invocations.

```console
$ cmake ../compiler-rt -DCMAKE_C_COMPILER=clang
  -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -GNinja
  -DCMAKE_C_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa>
  -DCMAKE_CXX_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa>
  -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1
  -DLLVM_CMAKE_DIR=../cmake/Modules -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON
  -C ../compiler-rt/cmake/caches/GPU.cmake
```

Some pointers would be appreciated for how to test this using a standard
(non-default target only) build.

GPU builds are somewhat finnicky. We only expect this to be built with a
sufficiently new clang, as it's the only compiler that supports the
target and output we distribute. Distribution is done as LLVM-IR blobs.
GPUs have little backward compatibility, so linking object files is
difficult. However, this prevents us from calling these functions
post-LTO as they will have been optimized out. Another issue is the
CMake flag querying functions, currently these fail on nvptx if you
don't have CUDA installed because they want to use the `ptxas` and
`nvlink` binaries.

More work is necessary to build correctly for all targets and ship into
the correct clang resource directory. Additionally we need to use the
`libc` project's support for running unit tests.
  • Loading branch information
jhuber6 committed Jun 12, 2024
1 parent d6bbe2e commit 985c3dd
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 3 deletions.
32 changes: 31 additions & 1 deletion compiler-rt/cmake/Modules/CompilerRTUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ macro(test_target_arch arch def)
endmacro()

macro(detect_target_arch)
check_symbol_exists(__AMDGPU__ "" __AMDGPU)
check_symbol_exists(__arm__ "" __ARM)
check_symbol_exists(__AVR__ "" __AVR)
check_symbol_exists(__aarch64__ "" __AARCH64)
Expand All @@ -154,6 +155,7 @@ macro(detect_target_arch)
check_symbol_exists(__loongarch__ "" __LOONGARCH)
check_symbol_exists(__mips__ "" __MIPS)
check_symbol_exists(__mips64__ "" __MIPS64)
check_symbol_exists(__NVPTX__ "" __NVPTX)
check_symbol_exists(__powerpc__ "" __PPC)
check_symbol_exists(__powerpc64__ "" __PPC64)
check_symbol_exists(__powerpc64le__ "" __PPC64LE)
Expand All @@ -164,7 +166,9 @@ macro(detect_target_arch)
check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
check_symbol_exists(__ve__ "" __VE)
if(__ARM)
if(__AMDGPU)
add_default_target_arch(amdgcn)
elseif(__ARM)
add_default_target_arch(arm)
elseif(__AVR)
add_default_target_arch(avr)
Expand Down Expand Up @@ -192,6 +196,8 @@ macro(detect_target_arch)
add_default_target_arch(mips64)
elseif(__MIPS)
add_default_target_arch(mips)
elseif(__NVPTX)
add_default_target_arch(nvptx64)
elseif(__PPC64) # must be checked before __PPC
add_default_target_arch(powerpc64)
elseif(__PPC64LE)
Expand Down Expand Up @@ -388,6 +394,30 @@ macro(construct_compiler_rt_default_triple)
set(COMPILER_RT_DEFAULT_TARGET_ARCH "i386")
endif()

# If we are directly targeting a GPU we need to check that the compiler is
# compatible and pass some default arguments.
if(COMPILER_RT_DEFAULT_TARGET_ONLY)
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
# Ensure the compiler is a valid clang when building the GPU target.
set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
if(LLVM_VERSION_MAJOR AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND
${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}"))
message(FATAL_ERROR "Cannot build compiler-rt for GPU. CMake compiler "
"'${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}' "
" is not 'Clang ${req_ver}'.")
endif()
set(COMPILER_RT_GPU_TARGET TRUE)
endif()

# Pass the necessary flags to make flag detection work.
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
# FIXME: This doesn't work with no CUDA installation.
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -march=sm_75")
endif()
endif()

# Determine if test target triple is specified explicitly, and doesn't match the
# default.
if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL LLVM_TARGET_TRIPLE)
Expand Down
4 changes: 4 additions & 0 deletions compiler-rt/cmake/base-config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ macro(test_targets)
test_target_arch(x86_64 "" "")
endif()
endif()
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib" "-emit-llvm")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64")
test_target_arch(loongarch64 "" "")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc64le|ppc64le")
Expand Down Expand Up @@ -254,6 +256,8 @@ macro(test_targets)
test_target_arch(mips "" "-mips32r2" "-mabi=32" "-D_LARGEFILE_SOURCE=1" "-D_FILE_OFFSET_BITS=64")
test_target_arch(mips64 "" "-mips64r2" "-mabi=64")
endif()
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
test_target_arch(amdgcn "" "--nvptx64-nvidia-cuda" "-nogpulib" "-emit-llvm" "-c")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
if(WIN32)
test_target_arch(arm "" "" "")
Expand Down
8 changes: 6 additions & 2 deletions compiler-rt/cmake/builtin-config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ builtin_check_c_compiler_flag(-fno-profile-generate COMPILER_RT_HAS_FNO_PROFILE_
builtin_check_c_compiler_flag(-fno-profile-instr-generate COMPILER_RT_HAS_FNO_PROFILE_INSTR_GENERATE_FLAG)
builtin_check_c_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR_USE_FLAG)
builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC)
builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB)
builtin_check_c_compiler_flag(-emit-llvm COMPILER_RT_HAS_EMIT_LLVM)
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)

Expand Down Expand Up @@ -52,6 +54,7 @@ else()
set(OS_NAME "${CMAKE_SYSTEM_NAME}")
endif()

set(AMDGPU amdgcn)
set(ARM64 aarch64)
set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main)
set(AVR avr)
Expand All @@ -61,6 +64,7 @@ set(X86_64 x86_64)
set(LOONGARCH64 loongarch64)
set(MIPS32 mips mipsel)
set(MIPS64 mips64 mips64el)
set(NVPTX nvptx64)
set(PPC32 powerpc powerpcspe)
set(PPC64 powerpc64 powerpc64le)
set(RISCV32 riscv32)
Expand All @@ -78,8 +82,8 @@ if(APPLE)
endif()

set(ALL_BUILTIN_SUPPORTED_ARCH
${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
${X86} ${X86_64} ${AMDGPU} ${ARM32} ${ARM64} ${AVR}
${HEXAGON} ${MIPS32} ${MIPS64} ${NVPTX} ${PPC32} ${PPC64}
${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
${WASM32} ${WASM64} ${VE} ${LOONGARCH64})

Expand Down
18 changes: 18 additions & 0 deletions compiler-rt/cmake/caches/GPU.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# This file sets up a CMakeCache for GPU builds of compiler-rt. This supports
# amdgcn and nvptx builds targeting the builtins library.

set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")

set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "")
set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
10 changes: 10 additions & 0 deletions compiler-rt/lib/builtins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,8 @@ if (MINGW)
)
endif()

set(amdgcn_SOURCES ${GENERIC_SOURCES})

set(armv4t_SOURCES ${arm_min_SOURCES})
set(armv5te_SOURCES ${arm_min_SOURCES})
set(armv6_SOURCES ${arm_min_SOURCES})
Expand Down Expand Up @@ -698,6 +700,8 @@ set(mips64_SOURCES ${GENERIC_TF_SOURCES}
set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
${mips_SOURCES})

set(nvptx64_SOURCES ${GENERIC_SOURCES})

set(powerpc_SOURCES ${GENERIC_SOURCES})

set(powerpcspe_SOURCES ${GENERIC_SOURCES})
Expand Down Expand Up @@ -803,6 +807,12 @@ else ()
endif()
endif()

# Directly targeting the GPU requires a few extra flags.
if(COMPILER_RT_GPU_TARGET)
append_list_if(COMPILER_RT_HAS_NOGPULIB -nogpulib BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_EMIT_LLVM -emit-llvm BUILTIN_CFLAGS)
endif()

set(BUILTIN_DEFS "")

if(COMPILER_RT_BUILTINS_HIDE_SYMBOLS)
Expand Down

0 comments on commit 985c3dd

Please sign in to comment.