Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[compiler-rt] Initial support for builtins on GPU targets #95304

Merged
merged 1 commit into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions compiler-rt/cmake/Modules/AddCompilerRT.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,9 @@ function(add_compiler_rt_runtime name type)
${ARGN})
set(libnames)
# Until we support this some other way, build compiler-rt runtime without LTO
# to allow non-LTO projects to link with it.
if(COMPILER_RT_HAS_FNO_LTO_FLAG)
# to allow non-LTO projects to link with it. GPU targets can currently only be
# distributed as LLVM-IR and ignore this.
if(COMPILER_RT_HAS_FNO_LTO_FLAG AND NOT COMPILER_RT_GPU_BUILD)
set(NO_LTO_FLAGS "-fno-lto")
else()
set(NO_LTO_FLAGS "")
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/cmake/Modules/BuiltinTests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ endfunction()
function(builtin_check_c_compiler_flag flag output)
if(NOT DEFINED ${output})
message(STATUS "Performing Test ${output}")
try_compile_only(result FLAGS ${flag})
try_compile_only(result FLAGS ${flag} ${CMAKE_REQUIRED_FLAGS})
set(${output} ${result} CACHE INTERNAL "Compiler supports ${flag}")
if(${result})
message(STATUS "Performing Test ${output} - Success")
Expand Down
27 changes: 26 additions & 1 deletion compiler-rt/cmake/Modules/CompilerRTUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ macro(test_target_arch arch def)
endmacro()

macro(detect_target_arch)
check_symbol_exists(__AMDGPU__ "" __AMDGPU)
check_symbol_exists(__arm__ "" __ARM)
check_symbol_exists(__AVR__ "" __AVR)
check_symbol_exists(__aarch64__ "" __AARCH64)
Expand All @@ -154,6 +155,7 @@ macro(detect_target_arch)
check_symbol_exists(__loongarch__ "" __LOONGARCH)
check_symbol_exists(__mips__ "" __MIPS)
check_symbol_exists(__mips64__ "" __MIPS64)
check_symbol_exists(__NVPTX__ "" __NVPTX)
check_symbol_exists(__powerpc__ "" __PPC)
check_symbol_exists(__powerpc64__ "" __PPC64)
check_symbol_exists(__powerpc64le__ "" __PPC64LE)
Expand All @@ -164,7 +166,9 @@ macro(detect_target_arch)
check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
check_symbol_exists(__ve__ "" __VE)
if(__ARM)
if(__AMDGPU)
add_default_target_arch(amdgcn)
elseif(__ARM)
add_default_target_arch(arm)
elseif(__AVR)
add_default_target_arch(avr)
Expand Down Expand Up @@ -192,6 +196,8 @@ macro(detect_target_arch)
add_default_target_arch(mips64)
elseif(__MIPS)
add_default_target_arch(mips)
elseif(__NVPTX)
add_default_target_arch(nvptx64)
elseif(__PPC64) # must be checked before __PPC
add_default_target_arch(powerpc64)
elseif(__PPC64LE)
Expand Down Expand Up @@ -388,6 +394,21 @@ macro(construct_compiler_rt_default_triple)
set(COMPILER_RT_DEFAULT_TARGET_ARCH "i386")
endif()

# If we are directly targeting a GPU we need to check that the compiler is
# compatible and pass some default arguments.
if(COMPILER_RT_DEFAULT_TARGET_ONLY)

# Pass the necessary flags to make flag detection work.
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
set(COMPILER_RT_GPU_BUILD ON)
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
arsenm marked this conversation as resolved.
Show resolved Hide resolved
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
set(COMPILER_RT_GPU_BUILD ON)
set(CMAKE_REQUIRED_FLAGS
"${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
endif()
endif()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be extracted somehow? I suspect that we need this in base-config-ix.cmake as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the difference? I thought this was the one place that checked the default argument.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for a regular build (not cross compiling directly) it will be fine since it will just use clang which should hopefully be supported by the user's system, this is mostly required for when the Runtimes build invoked it with CMAKE_C_TARGET=nvptx64-nvidia-cuda.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifying CMAKE_C_TARGET doesn't imply cross-compilation though. I could use a x64 compiler, hosted on x64, and still specify the x64 target and that would not be cross-compilation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, but I'm just going off of the expected case where the compiler has an implicit --target= version. I'm assuming the base logic just checks if it can target any of them? The only logic I saw for that was in base-config-ix.cmake and it went off of the default target, so I'm unsure where else to modify.


# Determine if test target triple is specified explicitly, and doesn't match the
# default.
if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL LLVM_TARGET_TRIPLE)
Expand Down Expand Up @@ -466,6 +487,10 @@ function(get_compiler_rt_target arch variable)
endif()
endif()
set(target "${arch}${triple_suffix}")
elseif("${arch}" MATCHES "^amdgcn")
set(target "amdgcn-amd-amdhsa")
elseif("${arch}" MATCHES "^nvptx")
set(target "nvptx64-nvidia-cuda")
else()
set(target "${arch}${triple_suffix}")
endif()
Expand Down
7 changes: 7 additions & 0 deletions compiler-rt/cmake/base-config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ macro(test_targets)
test_target_arch(x86_64 "" "")
endif()
endif()
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib"
"-flto" "-fconvergent-functions"
"-Xclang -mcode-object-version=none")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64")
test_target_arch(loongarch64 "" "")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc64le|ppc64le")
Expand Down Expand Up @@ -254,6 +258,9 @@ macro(test_targets)
test_target_arch(mips "" "-mips32r2" "-mabi=32" "-D_LARGEFILE_SOURCE=1" "-D_FILE_OFFSET_BITS=64")
test_target_arch(mips64 "" "-mips64r2" "-mabi=64")
endif()
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto"
"-fconvergent-functions" "-c")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
if(WIN32)
test_target_arch(arm "" "" "")
Expand Down
13 changes: 10 additions & 3 deletions compiler-rt/cmake/builtin-config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ builtin_check_c_compiler_flag(-fno-profile-generate COMPILER_RT_HAS_FNO_PROFILE_
builtin_check_c_compiler_flag(-fno-profile-instr-generate COMPILER_RT_HAS_FNO_PROFILE_INSTR_GENERATE_FLAG)
builtin_check_c_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR_USE_FLAG)
builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC)
builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)

Expand Down Expand Up @@ -52,6 +56,7 @@ else()
set(OS_NAME "${CMAKE_SYSTEM_NAME}")
endif()

set(AMDGPU amdgcn)
set(ARM64 aarch64)
set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main)
set(AVR avr)
Expand All @@ -61,6 +66,7 @@ set(X86_64 x86_64)
set(LOONGARCH64 loongarch64)
set(MIPS32 mips mipsel)
set(MIPS64 mips64 mips64el)
set(NVPTX nvptx64)
set(PPC32 powerpc powerpcspe)
set(PPC64 powerpc64 powerpc64le)
set(RISCV32 riscv32)
Expand All @@ -78,8 +84,8 @@ if(APPLE)
endif()

set(ALL_BUILTIN_SUPPORTED_ARCH
${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
${X86} ${X86_64} ${AMDGPU} ${ARM32} ${ARM64} ${AVR}
${HEXAGON} ${MIPS32} ${MIPS64} ${NVPTX} ${PPC32} ${PPC64}
${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
${WASM32} ${WASM64} ${VE} ${LOONGARCH64})

Expand Down Expand Up @@ -245,7 +251,8 @@ else()
${ALL_BUILTIN_SUPPORTED_ARCH})
endif()

if (OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER)
if(OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER AND NOT
COMPILER_RT_GPU_BUILD)
set(COMPILER_RT_HAS_CRT TRUE)
else()
set(COMPILER_RT_HAS_CRT FALSE)
Expand Down
18 changes: 18 additions & 0 deletions compiler-rt/cmake/caches/GPU.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# This file sets up a CMakeCache for GPU builds of compiler-rt. This supports
# amdgcn and nvptx builds targeting the builtins library.

set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")

set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "")
set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
21 changes: 20 additions & 1 deletion compiler-rt/lib/builtins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
"Skip the atomic builtin (these should normally be provided by a shared library)"
On)

if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD AND NOT COMPILER_RT_GPU_BUILD)
set(GENERIC_SOURCES
${GENERIC_SOURCES}
emutls.c
Expand Down Expand Up @@ -627,6 +627,8 @@ if (MINGW)
)
endif()

set(amdgcn_SOURCES ${GENERIC_SOURCES})

set(armv4t_SOURCES ${arm_min_SOURCES})
set(armv5te_SOURCES ${arm_min_SOURCES})
set(armv6_SOURCES ${arm_min_SOURCES})
Expand Down Expand Up @@ -706,6 +708,8 @@ set(mips64_SOURCES ${GENERIC_TF_SOURCES}
set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
${mips_SOURCES})

set(nvptx64_SOURCES ${GENERIC_SOURCES})

set(powerpc_SOURCES ${GENERIC_SOURCES})

set(powerpcspe_SOURCES ${GENERIC_SOURCES})
Expand Down Expand Up @@ -811,6 +815,21 @@ else ()
endif()
endif()

# Directly targeting the GPU requires a few extra flags.
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
-fconvergent-functions BUILTIN_CFLAGS)

# AMDGPU targets want to use a generic ABI.
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
"SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS)
endif()
endif()

set(BUILTIN_DEFS "")

if(COMPILER_RT_BUILTINS_HIDE_SYMBOLS)
Expand Down
Loading