141 changes: 117 additions & 24 deletions libc/cmake/modules/LLVMLibCLibraryRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -50,31 +50,9 @@ function(collect_object_file_deps target result)
endif()
endfunction(collect_object_file_deps)

# A rule to build a library from a collection of entrypoint objects.
# Usage:
# add_entrypoint_library(
# DEPENDS <list of add_entrypoint_object targets>
# )
#
# NOTE: If one wants an entrypoint to be available in a library, then they will
# have to list the entrypoint target explicitly in the DEPENDS list. Implicit
# entrypoint dependencies will not be added to the library.
function(add_entrypoint_library target_name)
cmake_parse_arguments(
"ENTRYPOINT_LIBRARY"
"" # No optional arguments
"" # No single value arguments
"DEPENDS" # Multi-value arguments
${ARGN}
)
if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
"of 'add_entrypoint_object' targets.")
endif()

get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
function(get_all_object_file_deps result fq_deps_list)
set(all_deps "")
foreach(dep IN LISTS fq_deps_list)
foreach(dep ${fq_deps_list})
get_target_property(dep_type ${dep} "TARGET_TYPE")
if(NOT ((${dep_type} STREQUAL ${ENTRYPOINT_OBJ_TARGET_TYPE}) OR
(${dep_type} STREQUAL ${ENTRYPOINT_EXT_TARGET_TYPE}) OR
Expand Down Expand Up @@ -102,6 +80,121 @@ function(add_entrypoint_library target_name)
list(APPEND all_deps ${entrypoint_target})
endforeach(dep)
list(REMOVE_DUPLICATES all_deps)
set(${result} ${all_deps} PARENT_SCOPE)
endfunction()

# A rule to build a library from a collection of entrypoint objects and bundle
# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'.
# Usage:
# add_gpu_entrypoint_library(
# DEPENDS <list of add_entrypoint_object targets>
# )
function(add_gpu_entrypoint_library target_name)
cmake_parse_arguments(
"ENTRYPOINT_LIBRARY"
"" # No optional arguments
"" # No single value arguments
"DEPENDS" # Multi-value arguments
${ARGN}
)
if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
"of 'add_entrypoint_object' targets.")
endif()

get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
get_all_object_file_deps(all_deps "${fq_deps_list}")

# The GPU 'libc' needs to be exported in a format that can be linked with
# offloading langauges like OpenMP or CUDA. This wraps every GPU object into a
# fat binary and adds them to a static library.
set(objects "")
foreach(dep IN LISTS all_deps)
set(object $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>)
string(FIND ${dep} "." last_dot_loc REVERSE)
math(EXPR name_loc "${last_dot_loc} + 1")
string(SUBSTRING ${dep} ${name_loc} -1 name)
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63)
elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa)
endif()

# Use the 'clang-offload-packager' to merge these files into a binary blob.
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin"
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary
COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
"${prefix},file=$<JOIN:${object},,file=>" -o
${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin
DEPENDS ${dep}
COMMENT "Packaging LLVM offloading binary for '${object}'"
)
add_custom_target(${dep}.__gpubin__ DEPENDS ${dep}
"${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")

# CMake does not permit setting the name on object files. In order to have
# human readable names we create an empty stub file with the entrypoint
# name. This empty file will then have the created binary blob embedded.
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp
DEPENDS ${dep} ${dep}.__gpubin__
)
add_custom_target(${dep}.__stub__
DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp")

add_library(${dep}.__fatbin__
EXCLUDE_FROM_ALL OBJECT
"${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
)

# This is always compiled for the LLVM host triple instead of the native GPU
# triple that is used by default in the build.
target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib)
target_compile_options(${dep}.__fatbin__ PRIVATE
--target=${LLVM_HOST_TRIPLE}
"SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
add_dependencies(${dep}.__fatbin__ ${dep} ${dep}.__stub__ ${dep}.__gpubin__)

# Set the list of newly create fat binaries containing embedded device code.
list(APPEND objects $<TARGET_OBJECTS:${dep}.__fatbin__>)
endforeach()

add_library(
${target_name}
STATIC
${objects}
)
set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR})
endfunction(add_gpu_entrypoint_library)

# A rule to build a library from a collection of entrypoint objects.
# Usage:
# add_entrypoint_library(
# DEPENDS <list of add_entrypoint_object targets>
# )
#
# NOTE: If one wants an entrypoint to be available in a library, then they will
# have to list the entrypoint target explicitly in the DEPENDS list. Implicit
# entrypoint dependencies will not be added to the library.
function(add_entrypoint_library target_name)
cmake_parse_arguments(
"ENTRYPOINT_LIBRARY"
"" # No optional arguments
"" # No single value arguments
"DEPENDS" # Multi-value arguments
${ARGN}
)
if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
"of 'add_entrypoint_object' targets.")
endif()

get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
get_all_object_file_deps(all_deps "${fq_deps_list}")

set(objects "")
foreach(dep IN LISTS all_deps)
list(APPEND objects $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>)
Expand Down
348 changes: 78 additions & 270 deletions libc/cmake/modules/LLVMLibCObjectRules.cmake

Large diffs are not rendered by default.

47 changes: 34 additions & 13 deletions libc/cmake/modules/LLVMLibCTestRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ function(add_integration_test test_name)
${fq_build_target_name}
EXCLUDE_FROM_ALL
# The NVIDIA 'nvlink' linker does not currently support static libraries.
$<$<BOOL:${LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX}>:${link_object_files}>
$<$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_NVPTX}>:${link_object_files}>
${INTEGRATION_TEST_SRCS}
${INTEGRATION_TEST_HDRS}
)
Expand All @@ -461,8 +461,17 @@ function(add_integration_test test_name)
_get_hermetic_test_compile_options(compile_options "${INTEGRATION_TEST_COMPILE_OPTIONS}")
target_compile_options(${fq_build_target_name} PRIVATE ${compile_options})

if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
target_link_options(${fq_build_target_name} PRIVATE -nostdlib -static)
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
target_link_options(${fq_build_target_name} PRIVATE
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
"-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}")
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
# We need to use the internal object versions for NVPTX.
set(internal_suffix ".__internal__")
target_link_options(${fq_build_target_name} PRIVATE
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
"--cuda-path=${LIBC_CUDA_ROOT}")
elseif(LIBC_CC_SUPPORTS_NOSTDLIBPP)
target_link_options(${fq_build_target_name} PRIVATE -nolibc -nostartfiles -nostdlib++ -static)
else()
Expand All @@ -474,9 +483,10 @@ function(add_integration_test test_name)
target_link_libraries(
${fq_build_target_name}
# The NVIDIA 'nvlink' linker does not currently support static libraries.
$<$<NOT:$<BOOL:${LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX}>>:${fq_target_name}.__libc__>
libc.startup.${LIBC_TARGET_OS}.crt1
libc.test.IntegrationTest.test)
$<$<NOT:$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_NVPTX}>>:${fq_target_name}.__libc__>
libc.startup.${LIBC_TARGET_OS}.crt1${internal_suffix}
libc.test.IntegrationTest.test${internal_suffix}
)
add_dependencies(${fq_build_target_name}
libc.test.IntegrationTest.test
${INTEGRATION_TEST_DEPENDS})
Expand All @@ -495,7 +505,7 @@ function(add_integration_test test_name)
# makes `add_custom_target` construct the correct command and execute it.
set(test_cmd
${INTEGRATION_TEST_ENV}
$<$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_GPU}>:${gpu_loader_exe}>
$<$<BOOL:${LIBC_TARGET_OS_IS_GPU}>:${gpu_loader_exe}>
${CMAKE_CROSSCOMPILING_EMULATOR}
${INTEGRATION_TEST_LOADER_ARGS}
$<TARGET_FILE:${fq_build_target_name}> ${INTEGRATION_TEST_ARGS})
Expand Down Expand Up @@ -606,7 +616,7 @@ function(add_libc_hermetic_test test_name)
${fq_build_target_name}
EXCLUDE_FROM_ALL
# The NVIDIA 'nvlink' linker does not currently support static libraries.
$<$<BOOL:${LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX}>:${link_object_files}>
$<$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_NVPTX}>:${link_object_files}>
${HERMETIC_TEST_SRCS}
${HERMETIC_TEST_HDRS}
)
Expand All @@ -615,6 +625,8 @@ function(add_libc_hermetic_test test_name)
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
#OUTPUT_NAME ${fq_target_name}
)

_get_hermetic_test_compile_options(compile_options "${HERMETIC_TEST_COMPILE_OPTIONS}")
target_include_directories(${fq_build_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR})
target_include_directories(${fq_build_target_name} PRIVATE ${LIBC_SOURCE_DIR})
_get_hermetic_test_compile_options(compile_options "${HERMETIC_TEST_COMPILE_OPTIONS}")
Expand All @@ -629,8 +641,17 @@ function(add_libc_hermetic_test test_name)
endif()
endforeach()

if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
target_link_options(${fq_build_target_name} PRIVATE -nostdlib -static)
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
target_link_options(${fq_build_target_name} PRIVATE
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
"-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}")
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
# We need to use the internal object versions for NVPTX.
set(internal_suffix ".__internal__")
target_link_options(${fq_build_target_name} PRIVATE
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
"--cuda-path=${LIBC_CUDA_ROOT}")
elseif(LIBC_CC_SUPPORTS_NOSTDLIBPP)
target_link_options(${fq_build_target_name} PRIVATE -nolibc -nostartfiles -nostdlib++ -static)
else()
Expand All @@ -642,12 +663,12 @@ function(add_libc_hermetic_test test_name)
target_link_libraries(
${fq_build_target_name}
PRIVATE
libc.startup.${LIBC_TARGET_OS}.crt1
libc.startup.${LIBC_TARGET_OS}.crt1${internal_suffix}
${link_libraries}
LibcTest.hermetic
LibcHermeticTestSupport.hermetic
# The NVIDIA 'nvlink' linker does not currently support static libraries.
$<$<NOT:$<BOOL:${LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX}>>:${fq_target_name}.__libc__>)
$<$<NOT:$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_NVPTX}>>:${fq_target_name}.__libc__>)
add_dependencies(${fq_build_target_name}
LibcTest.hermetic
libc.test.UnitTest.ErrnoSetterMatcher
Expand All @@ -660,7 +681,7 @@ function(add_libc_hermetic_test test_name)
endif()

set(test_cmd ${HERMETIC_TEST_ENV}
$<$<BOOL:${LIBC_TARGET_ARCHITECTURE_IS_GPU}>:${gpu_loader_exe}> ${CMAKE_CROSSCOMPILING_EMULATOR} ${HERMETIC_TEST_LOADER_ARGS}
$<$<BOOL:${LIBC_TARGET_OS_IS_GPU}>:${gpu_loader_exe}> ${CMAKE_CROSSCOMPILING_EMULATOR} ${HERMETIC_TEST_LOADER_ARGS}
$<TARGET_FILE:${fq_build_target_name}> ${HERMETIC_TEST_ARGS})
add_custom_target(
${fq_target_name}
Expand Down
108 changes: 32 additions & 76 deletions libc/cmake/modules/prepare_libc_gpu_build.cmake
Original file line number Diff line number Diff line change
@@ -1,23 +1,8 @@
if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LIBC_TARGET_OS_IS_GPU)
message(FATAL_ERROR
"libc build: Invalid attempt to set up GPU architectures.")
endif()

# Set up the target architectures to build the GPU libc for.
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942"
"gfx1010;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034"
"gfx1035;gfx1036"
"gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151")
set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90")
set(all_gpu_architectures
"${all_amdgpu_architectures};${all_nvptx_architectures}")
set(LIBC_GPU_ARCHITECTURES "all" CACHE STRING
"List of GPU architectures to build the libc for.")
set(AMDGPU_TARGET_TRIPLE "amdgcn-amd-amdhsa")
set(NVPTX_TARGET_TRIPLE "nvptx64-nvidia-cuda")

# Ensure the compiler is a valid clang when building the GPU target.
set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND
Expand All @@ -31,40 +16,6 @@ if(NOT LLVM_LIBC_FULL_BUILD)
"GPU.")
endif()

# Identify any locally installed AMD GPUs on the system using 'amdgpu-arch'.
find_program(LIBC_AMDGPU_ARCH
NAMES amdgpu-arch NO_DEFAULT_PATH
PATHS ${LLVM_BINARY_DIR}/bin /opt/rocm/llvm/bin/)

# Identify any locally installed NVIDIA GPUs on the system using 'nvptx-arch'.
find_program(LIBC_NVPTX_ARCH
NAMES nvptx-arch NO_DEFAULT_PATH
PATHS ${LLVM_BINARY_DIR}/bin)

# Get the list of all natively supported GPU architectures.
set(detected_gpu_architectures "")
foreach(arch_tool ${LIBC_NVPTX_ARCH} ${LIBC_AMDGPU_ARCH})
if(arch_tool)
execute_process(COMMAND ${arch_tool}
OUTPUT_VARIABLE arch_tool_output
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REPLACE "\n" ";" arch_list "${arch_tool_output}")
list(APPEND detected_gpu_architectures "${arch_list}")
endif()
endforeach()
list(REMOVE_DUPLICATES detected_gpu_architectures)

if(LIBC_GPU_ARCHITECTURES STREQUAL "all")
set(LIBC_GPU_ARCHITECTURES ${all_gpu_architectures})
elseif(LIBC_GPU_ARCHITECTURES STREQUAL "native")
if(NOT detected_gpu_architectures)
message(FATAL_ERROR "No GPUs found on the system when using 'native'")
endif()
set(LIBC_GPU_ARCHITECTURES ${detected_gpu_architectures})
endif()
message(STATUS "Building libc for the following GPU architecture(s): "
"${LIBC_GPU_ARCHITECTURES}")

# Identify the program used to package multiple images into a single binary.
find_program(LIBC_CLANG_OFFLOAD_PACKAGER
NAMES clang-offload-packager NO_DEFAULT_PATH
Expand All @@ -87,49 +38,54 @@ else()
endif()

set(LIBC_GPU_TEST_ARCHITECTURE "" CACHE STRING "Architecture for the GPU tests")
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
check_cxx_compiler_flag("-nogpulib -mcpu=native" PLATFORM_HAS_GPU)
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
# Identify any locally installed NVIDIA GPUs on the system using 'nvptx-arch'.
# Using 'check_cxx_compiler_flag' does not work currently due to the link job.
find_program(LIBC_NVPTX_ARCH
NAMES nvptx-arch NO_DEFAULT_PATH
PATHS ${LLVM_BINARY_DIR}/bin)
if(LIBC_NVPTX_ARCH)
execute_process(COMMAND ${LIBC_NVPTX_ARCH}
OUTPUT_VARIABLE arch_tool_output
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(arch_tool_output MATCHES "^sm_[0-9]+")
set(PLATFORM_HAS_GPU TRUE)
endif()
endif()
endif()

set(gpu_test_architecture "")
if(LIBC_GPU_TEST_ARCHITECTURE)
set(LIBC_GPU_TESTS_DISABLED FALSE)
set(gpu_test_architecture ${LIBC_GPU_TEST_ARCHITECTURE})
message(STATUS "Using user-specified GPU architecture for testing: "
"'${gpu_test_architecture}'")
elseif(detected_gpu_architectures)
list(GET detected_gpu_architectures 0 gpu_test_architecture)
elseif(PLATFORM_HAS_GPU)
set(LIBC_GPU_TESTS_DISABLED FALSE)
set(gpu_test_architecture "native")
message(STATUS "Using GPU architecture detected on the system for testing: "
"'${gpu_test_architecture}'")
"'native'")
else()
list(LENGTH LIBC_GPU_ARCHITECTURES n_gpu_archs)
if (${n_gpu_archs} EQUAL 1)
set(gpu_test_architecture ${LIBC_GPU_ARCHITECTURES})
message(STATUS "Using user-specified GPU architecture for testing: "
"'${gpu_test_architecture}'")
else()
message(STATUS "No GPU architecture set for testing. GPU tests will not be "
"availibe. Set 'LIBC_GPU_TEST_ARCHITECTURE' to override.")
return()
endif()
set(LIBC_GPU_TESTS_DISABLED TRUE)
message(STATUS "No GPU architecture detected or provided, tests will not be "
"built")
endif()
set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}")

if("${gpu_test_architecture}" IN_LIST all_amdgpu_architectures)
set(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU TRUE)
set(LIBC_GPU_TARGET_TRIPLE ${AMDGPU_TARGET_TRIPLE})
set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}")
elseif("${gpu_test_architecture}" IN_LIST all_nvptx_architectures)
set(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX TRUE)
set(LIBC_GPU_TARGET_TRIPLE ${NVPTX_TARGET_TRIPLE})
set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}")
else()
message(FATAL_ERROR "Unknown GPU architecture '${gpu_test_architecture}'")
endif()
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
# FIXME: This is a hack required to keep the CUDA package from trying to find
# pthreads. We only link the CUDA driver, so this is unneeded.
add_library(CUDA::cudart_static_deps IMPORTED INTERFACE)

if(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
get_filename_component(LIBC_CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
endif()
endif()

if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
# The AMDGPU environment uses different code objects to encode the ABI for
# kernel calls and intrinsic functions. We want to specify this manually to
# conform to whatever the test suite was built to handle.
Expand Down
33 changes: 17 additions & 16 deletions libc/docs/gpu/using.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,39 +14,40 @@ Building the GPU library

LLVM's libc GPU support *must* be built with an up-to-date ``clang`` compiler
due to heavy reliance on ``clang``'s GPU support. This can be done automatically
using the ``LLVM_ENABLE_RUNTIMES=libc`` option. To enable libc for the GPU,
enable the ``LIBC_GPU_BUILD`` option. By default, ``libcgpu.a`` will be built
using every supported GPU architecture. To restrict the number of architectures
build, either set ``LIBC_GPU_ARCHITECTURES`` to the list of desired
architectures manually or use ``native`` to detect the GPUs on your system. A
typical ``cmake`` configuration will look like this:
using the LLVM runtimes support. The GPU build is done using cross-compilation
to the GPU architecture. This project currently supports AMD and NVIDIA GPUs
which can be targeted using the appropriate target name. The following
invocation will enable a cross-compiling build for the GPU architecture and
enable the ``libc`` project only for them.

.. code-block:: sh
$> cd llvm-project # The llvm-project checkout
$> mkdir build
$> cd build
$> cmake ../llvm -G Ninja \
-DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt" \
-DLLVM_ENABLE_RUNTIMES="libc;openmp" \
$> cmake ../llvm -G Ninja \
-DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt" \
-DLLVM_ENABLE_RUNTIMES="openmp" \
-DCMAKE_BUILD_TYPE=<Debug|Release> \ # Select build type
-DLIBC_GPU_BUILD=ON \ # Build in GPU mode
-DLIBC_GPU_ARCHITECTURES=all \ # Build all supported architectures
-DCMAKE_INSTALL_PREFIX=<PATH> \ # Where 'libcgpu.a' will live
-DCMAKE_INSTALL_PREFIX=<PATH> \ # Where 'libcgpu.a' will live
-DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=libc \
-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=libc \
-DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda
$> ninja install
Since we want to include ``clang``, ``lld`` and ``compiler-rt`` in our
toolchain, we list them in ``LLVM_ENABLE_PROJECTS``. To ensure ``libc`` is built
using a compatible compiler and to support ``openmp`` offloading, we list them
in ``LLVM_ENABLE_RUNTIMES`` to build them after the enabled projects using the
newly built compiler. ``CMAKE_INSTALL_PREFIX`` specifies the installation
directory in which to install the ``libcgpu.a`` library and headers along with
LLVM. The generated headers will be placed in ``include/gpu-none-llvm``.
directory in which to install the ``libcgpu-nvptx.a`` and ``libcgpu-amdgpu.a``
libraries and headers along with LLVM. The generated headers will be placed in
``include/<gpu-triple>``.

Usage
=====

Once the ``libcgpu.a`` static archive has been built it can be linked directly
Once the static archive has been built it can be linked directly
with offloading applications as a standard library. This process is described in
the `clang documentation <https://clang.llvm.org/docs/OffloadingDesign.html>`_.
This linking mode is used by the OpenMP toolchain, but is currently opt-in for
Expand All @@ -68,7 +69,7 @@ supported target device. The supported architectures can be seen using LLVM's
OFFLOADING IMAGE [0]:
kind llvm ir
arch gfx90a
arch generic
triple amdgcn-amd-amdhsa
producer none
Expand Down
6 changes: 3 additions & 3 deletions libc/include/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set(LIBC_INCLUDE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
include(LLVMLibCHeaderRules)

# The GPU build wants to install files in the compiler's resource directory.
if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(LIBC_TARGET_OS_IS_GPU)
include(GetClangResourceDir)
endif()

Expand Down Expand Up @@ -586,7 +586,7 @@ add_gen_header(
.llvm-libc-types.wchar_t
)

if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(LIBC_TARGET_OS_IS_GPU)
file(MAKE_DIRECTORY ${LIBC_INCLUDE_DIR}/gpu)

add_gen_header(
Expand Down Expand Up @@ -638,7 +638,7 @@ foreach(target IN LISTS all_install_header_targets)
# The GPU optionally provides the supported declarations externally so
# offloading languages like CUDA and OpenMP know what is supported by libc. We
# install these in the compiler's resource directory at a preset location.
if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND PACKAGE_VERSION)
if(LIBC_TARGET_OS_IS_GPU AND PACKAGE_VERSION)
get_target_property(decls_file ${target} DECLS_FILE_PATH)
if(NOT decls_file)
continue()
Expand Down
35 changes: 30 additions & 5 deletions libc/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@ set(libc_archive_targets "")
set(libc_archive_names "")
set(libc_archive_entrypoint_lists "")
if(LLVM_LIBC_FULL_BUILD)
if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
list(APPEND libc_archive_names cgpu mgpu)
else()
list(APPEND libc_archive_names c m)
endif()
list(APPEND libc_archive_names c m)
list(APPEND libc_archive_targets libc libm)
list(APPEND libc_archive_entrypoint_lists
TARGET_LIBC_ENTRYPOINTS TARGET_LIBM_ENTRYPOINTS)
Expand Down Expand Up @@ -40,6 +36,27 @@ foreach(archive IN ZIP_LISTS
endif()
endif()
list(APPEND added_archive_targets ${archive_1})

# Add the offloading version of the library for offloading languages. These
# are installed in the standard search path separate from the other libraries.
if(LIBC_TARGET_OS_IS_GPU)
set(libc_gpu_archive_target ${archive_1}gpu)
set(libc_gpu_archive_name ${archive_0}gpu-${LIBC_TARGET_ARCHITECTURE})

add_gpu_entrypoint_library(
${libc_gpu_archive_target}
DEPENDS
${${archive_2}}
)
set_target_properties(
${libc_gpu_archive_target}
PROPERTIES
ARCHIVE_OUTPUT_NAME ${libc_gpu_archive_name}
)
set_target_properties(${libc_gpu_archive_target} PROPERTIES
ARCHIVE_OUTPUT_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR})
list(APPEND added_gpu_archive_targets ${libc_gpu_archive_target})
endif()
endforeach()

install(
Expand All @@ -48,6 +65,14 @@ install(
COMPONENT libc
)

if(LIBC_TARGET_OS_IS_GPU)
install(
TARGETS ${added_gpu_archive_targets}
ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}
COMPONENT libc
)
endif()

if(NOT LIBC_TARGET_OS_IS_BAREMETAL)
# For now we will disable libc-startup installation for baremetal. The
# correct way to do it would be to make a hookable startup for baremetal
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/File/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
if(NOT (TARGET libc.src.__support.threads.mutex)
OR LIBC_TARGET_ARCHITECTURE_IS_GPU)
OR LIBC_TARGET_OS_IS_GPU)
# Not all platforms have a mutex implementation. If mutex is unvailable,
# we just skip everything about files.
return()
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LIBC_TARGET_OS_IS_GPU)
return()
endif()

Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/OSUtil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ if(NOT TARGET ${target_os_util})
endif()

# The OSUtil is an object library in GPU mode.
if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LIBC_TARGET_OS_IS_GPU)
add_header_library(
osutil
HDRS
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/RPC/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LIBC_TARGET_OS_IS_GPU)
return()
endif()

Expand Down
16 changes: 14 additions & 2 deletions libc/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
add_subdirectory(generic)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE})
add_subdirectory(${LIBC_TARGET_ARCHITECTURE})
elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
# TODO: We should split this into 'nvptx' and 'amdgpu' for the GPU build.
add_subdirectory(${LIBC_TARGET_OS})
endif()

function(add_math_entrypoint_object name)
# We prefer machine specific implementation if available. Hence we check
# that first and return early if we are able to add an alias target for the
# machine specific implementation.
get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.${name}" fq_machine_specific_target_name)
get_fq_target_name("${LIBC_TARGET_OS}.${name}" fq_os_specific_target_name)
if(TARGET ${fq_machine_specific_target_name})
add_entrypoint_object(
${name}
Expand All @@ -16,17 +20,25 @@ function(add_math_entrypoint_object name)
.${LIBC_TARGET_ARCHITECTURE}.${name}
)
return()
elseif(TARGET ${fq_os_specific_target_name})
add_entrypoint_object(
${name}
ALIAS
DEPENDS
.${LIBC_TARGET_OS}.${name}
)
return()
endif()

# The GPU optionally depends on vendor libraries. If we emitted one of these
# entrypoints it means the user requested it and we should use it instead.
get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.vendor.${name}" fq_vendor_specific_target_name)
get_fq_target_name("${LIBC_TARGET_OS}.vendor.${name}" fq_vendor_specific_target_name)
if(TARGET ${fq_vendor_specific_target_name})
add_entrypoint_object(
${name}
ALIAS
DEPENDS
.${LIBC_TARGET_ARCHITECTURE}.vendor.${name}
.${LIBC_TARGET_OS}.vendor.${name}
VENDOR
)
return()
Expand Down
1 change: 0 additions & 1 deletion libc/src/math/gpu/vendor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ else()
"functions will be an external reference to the vendor libraries.")
endif()

find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
set(libdevice_path ${CUDAToolkit_BIN_DIR}/../nvvm/libdevice/libdevice.10.bc)
if (EXISTS ${libdevice_path})
Expand Down
2 changes: 1 addition & 1 deletion libc/src/stdio/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
endif()

if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LIBC_TARGET_OS_IS_GPU)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/generic)
endif()

Expand Down
4 changes: 2 additions & 2 deletions libc/src/stdlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ if(LLVM_LIBC_INCLUDE_SCUDO)
DEPENDS
${SCUDO_DEPS}
)
elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU)
elseif(LIBC_TARGET_OS_IS_GPU)
add_entrypoint_external(
calloc
)
Expand Down Expand Up @@ -397,7 +397,7 @@ add_entrypoint_object(
.${LIBC_TARGET_OS}.abort
)

if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(LIBC_TARGET_OS_IS_GPU)
add_entrypoint_object(
malloc
ALIAS
Expand Down
12 changes: 6 additions & 6 deletions libc/src/string/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
add_bcmp(bcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW)
add_bcmp(bcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
add_bcmp(bcmp)
elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU)
elseif(LIBC_TARGET_OS_IS_GPU)
add_bcmp(bcmp)
else()
add_bcmp(bcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
Expand Down Expand Up @@ -530,7 +530,7 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
add_bzero(bzero_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
add_bzero(bzero)
elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU)
elseif(LIBC_TARGET_OS_IS_GPU)
add_bzero(bzero)
else()
add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
Expand Down Expand Up @@ -562,7 +562,7 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
add_memcmp(memcmp)
elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU)
elseif(LIBC_TARGET_OS_IS_GPU)
add_memcmp(memcmp)
else()
add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
Expand Down Expand Up @@ -598,7 +598,7 @@ elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0")
add_memcpy(memcpy MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0")
elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU)
elseif(LIBC_TARGET_OS_IS_GPU)
add_memcpy(memcpy)
else()
add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
Expand Down Expand Up @@ -632,7 +632,7 @@ elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0")
add_memmove(memmove MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0")
elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU)
elseif(LIBC_TARGET_OS_IS_GPU)
add_memmove(memmove)
else()
add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
Expand Down Expand Up @@ -667,7 +667,7 @@ elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0")
add_memset(memset MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0")
elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU)
elseif(LIBC_TARGET_OS_IS_GPU)
add_memset(memset)
else()
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
Expand Down
35 changes: 13 additions & 22 deletions libc/startup/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,24 @@ function(add_startup_object name)
)
endfunction()

if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
add_subdirectory(amdgpu)

add_startup_object(
crt1
ALIAS
DEPENDS
.amdgpu.crt1
)
elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
add_subdirectory(nvptx)

add_startup_object(
crt1
ALIAS
DEPENDS
.nvptx.crt1
)
else()
# Skip building the startup code if there are no supported GPUs.
message(STATUS "Skipping startup for gpu target, no GPUs were detected")
return()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE})
add_subdirectory(${LIBC_TARGET_ARCHITECTURE})
endif()

add_startup_object(
crt1
ALIAS
DEPENDS
.${LIBC_TARGET_ARCHITECTURE}.crt1
)

add_custom_target(libc-startup)
set(startup_components crt1)
foreach(target IN LISTS startup_components)
set(fq_target_name libc.startup.gpu.${target})
add_dependencies(libc-startup ${fq_target_name})
install(FILES $<TARGET_OBJECTS:${fq_target_name}>
DESTINATION ${LIBC_INSTALL_LIBRARY_DIR}
RENAME $<TARGET_PROPERTY:${fq_target_name},OUTPUT_NAME>
COMPONENT libc)
endforeach()
13 changes: 0 additions & 13 deletions libc/startup/gpu/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
add_startup_object(
crt1
NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
SRC
start.cpp
DEPENDS
Expand All @@ -11,17 +10,5 @@ add_startup_object(
COMPILE_OPTIONS
-ffreestanding # To avoid compiler warnings about calling the main function.
-fno-builtin
-mcode-object-version=${LIBC_GPU_CODE_OBJECT_VERSION} # Manually set the ABI.
)
get_fq_target_name(crt1 fq_name)

# Ensure that clang uses the correct linker for this object type.
target_link_libraries(
${fq_name}
PUBLIC
"-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}"
"--target=${LIBC_GPU_TARGET_TRIPLE}"
"-flto"
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0"
"-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}"
)
9 changes: 0 additions & 9 deletions libc/startup/gpu/nvptx/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
add_startup_object(
crt1
NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
SRC
start.cpp
DEPENDS
Expand All @@ -13,11 +12,3 @@ add_startup_object(
-fno-builtin
)
get_fq_target_name(crt1 fq_name)

# Ensure that clang uses the correct linker for this object type.
target_link_libraries(${fq_name}
PUBLIC
"-march=${LIBC_GPU_TARGET_ARCHITECTURE}"
"--target=${LIBC_GPU_TARGET_TRIPLE}"
"--cuda-path=${LIBC_CUDA_ROOT}"
)
6 changes: 3 additions & 3 deletions libc/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ add_custom_target(libc-long-running-tests)

add_subdirectory(UnitTest)

if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND
(NOT TARGET libc.utils.gpu.loader OR NOT TARGET libc.startup.gpu.crt1))
message(WARNING "Cannot build libc GPU tests, missing loader implementation")
if(LIBC_TARGET_OS_IS_GPU AND
(NOT TARGET libc.utils.gpu.loader OR LIBC_GPU_TESTS_DISABLED))
message(WARNING "Cannot build libc GPU tests, missing loader or architecture")
return()
endif()

Expand Down
16 changes: 0 additions & 16 deletions libc/test/IntegrationTest/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,5 @@
if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
set(TEST_COMPILE_FLAGS
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}
-emit-llvm # AMDGPU's intermediate object file format is bitcode.
--target=${LIBC_GPU_TARGET_TRIPLE}
-mcode-object-version=${LIBC_GPU_CODE_OBJECT_VERSION} # Manually set the ABI.
)
elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
set(TEST_COMPILE_FLAGS
-march=${LIBC_GPU_TARGET_ARCHITECTURE}
--target=${LIBC_GPU_TARGET_TRIPLE}
--cuda-path=${LIBC_CUDA_ROOT}
)
endif()

add_object_library(
test
NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
SRCS
test.cpp
COMPILE_OPTIONS
Expand Down
2 changes: 1 addition & 1 deletion libc/test/UnitTest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function(add_unittest_framework_library name)
endif()

# The Nvidia 'nvlink' linker does not support static libraries.
if(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
set(library_type OBJECT)
else()
set(library_type STATIC)
Expand Down
49 changes: 26 additions & 23 deletions libc/test/src/__support/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
add_custom_target(libc-support-tests)

# FIXME: These tests are currently broken on the GPU.
if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LIBC_TARGET_OS_IS_GPU)
add_libc_test(
blockstore_test
SUITE
Expand Down Expand Up @@ -76,7 +76,7 @@ add_libc_test(
)

# The GPU does not support varargs currently.
if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LIBC_TARGET_OS_IS_GPU)
add_libc_test(
arg_list_test
SUITE
Expand All @@ -88,8 +88,7 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
)
endif()

# FIXME: Crash in NVPTX target lowering for calls
if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_libc_test(
uint_test
SUITE
Expand Down Expand Up @@ -159,29 +158,33 @@ add_libc_test(
libc.src.__support.memory_size
)

add_executable(
libc_str_to_float_comparison_test
str_to_float_comparison_test.cpp
)
# FIXME: We shouldn't have regular executables created because we could be
# cross-compiling the tests and running through an emulator.
if(NOT LIBC_TARGET_OS_IS_GPU)
add_executable(
libc_str_to_float_comparison_test
str_to_float_comparison_test.cpp
)

target_link_libraries(libc_str_to_float_comparison_test
PRIVATE
"${LIBC_TARGET}"
)
target_link_libraries(libc_str_to_float_comparison_test
PRIVATE
"${LIBC_TARGET}"
)

add_executable(
libc_system_str_to_float_comparison_test
str_to_float_comparison_test.cpp
)
add_executable(
libc_system_str_to_float_comparison_test
str_to_float_comparison_test.cpp
)

set(float_test_file ${CMAKE_CURRENT_SOURCE_DIR}/str_to_float_comparison_data.txt)
set(float_test_file ${CMAKE_CURRENT_SOURCE_DIR}/str_to_float_comparison_data.txt)

add_custom_command(TARGET libc_str_to_float_comparison_test
POST_BUILD
COMMAND $<TARGET_FILE:libc_str_to_float_comparison_test> ${float_test_file}
DEPENDS ${float_test_file}
COMMENT "Test the strtof and strtod implementations against precomputed results."
VERBATIM)
add_custom_command(TARGET libc_str_to_float_comparison_test
POST_BUILD
COMMAND $<TARGET_FILE:libc_str_to_float_comparison_test> ${float_test_file}
DEPENDS ${float_test_file}
COMMENT "Test the strtof and strtod implementations against precomputed results."
VERBATIM)
endif()

add_subdirectory(CPP)
add_subdirectory(File)
Expand Down
2 changes: 1 addition & 1 deletion libc/test/src/__support/CPP/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ add_libc_test(


# This test fails with invalid address space operations on sm_60
if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_libc_test(
atomic_test
SUITE
Expand Down
2 changes: 1 addition & 1 deletion libc/test/src/__support/File/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
if(NOT (TARGET libc.src.__support.threads.mutex) OR LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT (TARGET libc.src.__support.threads.mutex) OR LIBC_TARGET_OS_IS_GPU)
# Not all platforms have a mutex implementation. If mutex is unvailable,
# we just skip everything about files. The GPU does not currently support
# files as well.
Expand Down
2 changes: 1 addition & 1 deletion libc/test/src/errno/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
if(NOT LLVM_LIBC_FULL_BUILD OR LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LLVM_LIBC_FULL_BUILD OR LIBC_TARGET_OS_IS_GPU)
return()
endif()

Expand Down
20 changes: 12 additions & 8 deletions libc/test/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
add_custom_target(libc-math-unittests)

add_library(
libc_math_test_utils
RandUtils.cpp
RandUtils.h
)
# FIXME: We shouldn't have regular libraries created because we could be
# cross-compiling the tests and running through an emulator.
if(NOT LIBC_TARGET_OS_IS_GPU)
add_library(
libc_math_test_utils
RandUtils.cpp
RandUtils.h
)
endif()

add_fp_unittest(
cosf_test
Expand Down Expand Up @@ -755,7 +759,7 @@ add_fp_unittest(
)

# FIXME: These tests are currently broken for NVPTX.
if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_fp_unittest(
ilogb_test
SUITE
Expand Down Expand Up @@ -986,7 +990,7 @@ add_fp_unittest(
)

# FIXME: These tests are currently broken on the GPU.
if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LIBC_TARGET_OS_IS_GPU)
add_fp_unittest(
fminf_test
SUITE
Expand Down Expand Up @@ -1231,7 +1235,7 @@ add_fp_unittest(
)

# FIXME: These tests are currently spurious for NVPTX.
if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_fp_unittest(
nextafter_test
SUITE
Expand Down
8 changes: 4 additions & 4 deletions libc/test/src/math/smoke/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ add_fp_unittest(
)

# FIXME: These tests are currently broken for NVPTX.
if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_fp_unittest(
ilogb_test
SUITE
Expand Down Expand Up @@ -1073,7 +1073,7 @@ add_fp_unittest(
)

# FIXME: These tests are currently broken on the GPU.
if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LIBC_TARGET_OS_IS_GPU)
add_fp_unittest(
fminf_test
SUITE
Expand Down Expand Up @@ -1417,7 +1417,7 @@ add_fp_unittest(
)

# FIXME: These tests are currently spurious for NVPTX.
if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_fp_unittest(
nextafter_test
SUITE
Expand Down Expand Up @@ -1465,7 +1465,7 @@ add_fp_unittest(
)

# FIXME: These tests are currently spurious for the GPU.
if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(NOT LIBC_TARGET_OS_IS_GPU)
add_fp_unittest(
nexttoward_test
SUITE
Expand Down
2 changes: 1 addition & 1 deletion libc/test/src/stdio/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ add_libc_test(
# Create an output directory for any temporary test files.
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testdata)

if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(LIBC_TARGET_OS_IS_GPU)
return()
endif()

Expand Down
6 changes: 3 additions & 3 deletions libc/test/src/stdlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ add_libc_test(
)

# This fails on NVPTX where the output value is one-off of the expected value.
if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_fp_unittest(
strtod_test
SUITE
Expand Down Expand Up @@ -127,7 +127,7 @@ add_libc_test(
)

# This fails on NVPTX where the output value is one-off of the expected value.
if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_libc_test(
strtold_test
SUITE
Expand Down Expand Up @@ -339,7 +339,7 @@ if(LLVM_LIBC_FULL_BUILD)
)

# Only the GPU has an in-tree 'malloc' implementation.
if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(LIBC_TARGET_OS_IS_GPU)
add_libc_test(
malloc_test
HERMETIC_TEST_ONLY
Expand Down
2 changes: 1 addition & 1 deletion libc/test/utils/UnitTest/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(LIBC_TARGET_OS_IS_GPU)
return()
endif()

Expand Down
2 changes: 1 addition & 1 deletion libc/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
if(LLVM_INCLUDE_TESTS)
add_subdirectory(MPFRWrapper)
endif()
if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
if(LIBC_TARGET_OS_IS_GPU)
add_subdirectory(gpu)
endif()
2 changes: 1 addition & 1 deletion libc/utils/MPFRWrapper/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ if(LIBC_TESTS_CAN_USE_MPFR)
target_link_directories(libcMPFRWrapper PUBLIC ${LLVM_LIBC_MPFR_INSTALL_PATH}/lib)
endif()
target_link_libraries(libcMPFRWrapper PUBLIC LibcFPTestHelpers.unit LibcTest.unit mpfr gmp)
elseif(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
elseif(NOT LIBC_TARGET_OS_IS_GPU)
message(WARNING "Math tests using MPFR will be skipped.")
endif()
4 changes: 3 additions & 1 deletion libc/utils/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
add_subdirectory(server)
add_subdirectory(loader)
if(LIBC_TARGET_OS_IS_GPU)
add_subdirectory(loader)
endif()
48 changes: 29 additions & 19 deletions libc/utils/gpu/loader/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,31 +1,30 @@
add_library(gpu_loader OBJECT Main.cpp)

target_include_directories(gpu_loader PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${LIBC_SOURCE_DIR}/include
${LIBC_SOURCE_DIR}
)

# This utility needs to be compiled for the host system when cross compiling.
if(LLVM_RUNTIMES_TARGET OR LIBC_TARGET_TRIPLE)
target_compile_options(gpu_loader PUBLIC --target=${LLVM_HOST_TRIPLE})
target_link_libraries(gpu_loader PUBLIC "--target=${LLVM_HOST_TRIPLE}")
endif()

find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
if(hsa-runtime64_FOUND)
if(hsa-runtime64_FOUND AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
add_subdirectory(amdgpu)
else()
elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
message(STATUS "Skipping HSA loader for gpu target, no HSA was detected")
endif()

find_package(CUDAToolkit QUIET)
# The CUDA loader requires LLVM to traverse the ELF image for symbols.
find_package(LLVM QUIET)
if(CUDAToolkit_FOUND AND LLVM_FOUND AND
"${CUDAToolkit_VERSION}" VERSION_GREATER_EQUAL "11.2")
if(CUDAToolkit_FOUND AND LLVM_FOUND AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_subdirectory(nvptx)
else()
if("${CUDAToolkit_VERSION}" VERSION_LESS "11.2")
message(WARNING
"Skipping CUDA loader for gpu target, CUDA must be version 11.2 or later.
Found CUDA Version ${CUDAToolkit_VERSION}")
else()
message(STATUS "Skipping CUDA loader for gpu target, no CUDA was detected")
endif()
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
message(STATUS "Skipping CUDA loader for gpu target, no CUDA was detected")
endif()

# Add a custom target to be used for testing.
Expand All @@ -37,20 +36,31 @@ if(LIBC_GPU_LOADER_EXECUTABLE)
PROPERTIES
EXECUTABLE "${LIBC_GPU_LOADER_EXECUTABLE}"
)
elseif(TARGET amdhsa_loader AND LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
elseif(TARGET amdhsa-loader AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
add_custom_target(libc.utils.gpu.loader)
add_dependencies(libc.utils.gpu.loader amdhsa_loader)
add_dependencies(libc.utils.gpu.loader amdhsa-loader)
set_target_properties(
libc.utils.gpu.loader
PROPERTIES
EXECUTABLE "$<TARGET_FILE:amdhsa_loader>"
TARGET amdhsa-loader
EXECUTABLE "$<TARGET_FILE:amdhsa-loader>"
)
elseif(TARGET nvptx_loader AND LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
elseif(TARGET nvptx-loader AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
add_custom_target(libc.utils.gpu.loader)
add_dependencies(libc.utils.gpu.loader nvptx_loader)
add_dependencies(libc.utils.gpu.loader nvptx-loader)
set_target_properties(
libc.utils.gpu.loader
PROPERTIES
EXECUTABLE "$<TARGET_FILE:nvptx_loader>"
TARGET nvptx-loader
EXECUTABLE "$<TARGET_FILE:nvptx-loader>"
)
endif()

if(TARGET libc.utils.gpu.loader)
get_target_property(gpu_loader_tgt libc.utils.gpu.loader "TARGET")
if(gpu_loader_tgt)
install(TARGETS ${gpu_loader_tgt}
DESTINATION ${CMAKE_INSTALL_BINDIR}
COMPONENT libc)
endif()
endif()
6 changes: 3 additions & 3 deletions libc/utils/gpu/loader/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
add_executable(amdhsa_loader Loader.cpp)
add_dependencies(amdhsa_loader libc.src.__support.RPC.rpc)
add_executable(amdhsa-loader Loader.cpp)
add_dependencies(amdhsa-loader libc.src.__support.RPC.rpc)

target_link_libraries(amdhsa_loader
target_link_libraries(amdhsa-loader
PRIVATE
hsa-runtime64::hsa-runtime64
gpu_loader
Expand Down
10 changes: 5 additions & 5 deletions libc/utils/gpu/loader/nvptx/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
add_executable(nvptx_loader Loader.cpp)
add_dependencies(nvptx_loader libc.src.__support.RPC.rpc)
add_executable(nvptx-loader Loader.cpp)
add_dependencies(nvptx-loader libc.src.__support.RPC.rpc)

if(NOT LLVM_ENABLE_RTTI)
target_compile_options(nvptx_loader PRIVATE -fno-rtti)
target_compile_options(nvptx-loader PRIVATE -fno-rtti)
endif()
target_include_directories(nvptx_loader PRIVATE ${LLVM_INCLUDE_DIRS})
target_link_libraries(nvptx_loader
target_include_directories(nvptx-loader PRIVATE ${LLVM_INCLUDE_DIRS})
target_link_libraries(nvptx-loader
PRIVATE
gpu_loader
llvmlibc_rpc_server
Expand Down
9 changes: 9 additions & 0 deletions libc/utils/gpu/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,21 @@ target_include_directories(llvmlibc_rpc_server PRIVATE ${LIBC_SOURCE_DIR})
target_include_directories(llvmlibc_rpc_server PUBLIC ${LIBC_SOURCE_DIR}/include)
target_include_directories(llvmlibc_rpc_server PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})


# Ignore unsupported clang attributes if we're using GCC.
target_compile_options(llvmlibc_rpc_server PUBLIC
$<$<CXX_COMPILER_ID:GNU>:-Wno-attributes>)
target_compile_definitions(llvmlibc_rpc_server PUBLIC
LIBC_NAMESPACE=${LIBC_NAMESPACE})

# This utility needs to be compiled for the host system when cross compiling.
if(LLVM_RUNTIMES_TARGET OR LIBC_TARGET_TRIPLE)
target_compile_options(llvmlibc_rpc_server PUBLIC
--target=${LLVM_HOST_TRIPLE})
target_link_libraries(llvmlibc_rpc_server PUBLIC
"--target=${LLVM_HOST_TRIPLE}")
endif()

# Install the server and associated header.
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/rpc_server.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/gpu-none-llvm/
Expand Down
4 changes: 3 additions & 1 deletion llvm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,9 @@ else()
foreach(_name ${LLVM_RUNTIME_TARGETS})
if("libc" IN_LIST RUNTIMES_${_name}_LLVM_ENABLE_RUNTIMES)
set(NEED_LIBC_HDRGEN TRUE)
break()
if("${_name}" STREQUAL "amdgcn-amd-amdhsa" OR "${_name}" STREQUAL "nvptx64-nvidia-cuda")
set(LLVM_LIBC_GPU_BUILD ON)
endif()
endif()
endforeach()
endif()
Expand Down
7 changes: 7 additions & 0 deletions llvm/cmake/modules/HandleLLVMOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,13 @@ if( LLVM_ENABLE_ASSERTIONS )
endif()
endif()

# If we are targeting a GPU architecture we want to ignore all the standard
# flag handling.
if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" OR
"${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx64")
return()
endif()

if(LLVM_ENABLE_EXPENSIVE_CHECKS)
add_compile_definitions(EXPENSIVE_CHECKS)

Expand Down
11 changes: 8 additions & 3 deletions llvm/runtimes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ foreach(entry ${runtimes})
list(APPEND prefixes "LLVM_LIBC")
list(APPEND prefixes "LIBC_")
# The `libc` project may require '-DCUDAToolkit_ROOT' in GPU mode.
if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES)
if(LLVM_LIBC_GPU_BUILD)
list(APPEND prefixes "CUDA")
endif()
endif()
Expand Down Expand Up @@ -424,7 +424,7 @@ if(runtimes)
endforeach()
endif()
if("libc" IN_LIST LLVM_ENABLE_PROJECTS AND
(LLVM_LIBC_FULL_BUILD OR LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES))
(LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD))
if(LIBC_HDRGEN_EXE)
set(hdrgen_exe ${LIBC_HDRGEN_EXE})
else()
Expand All @@ -441,7 +441,12 @@ if(runtimes)
set(libc_cmake_args "-DLIBC_HDRGEN_EXE=${hdrgen_exe}"
"-DLLVM_LIBC_FULL_BUILD=ON")
list(APPEND extra_deps ${hdrgen_deps})
if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES)
if(LLVM_LIBC_GPU_BUILD)
list(APPEND libc_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON")
# The `libc` project may require '-DCUDAToolkit_ROOT' in GPU mode.
if(CUDAToolkit_ROOT)
list(APPEND libc_cmake_args "-DCUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
endif()
foreach(dep clang-offload-packager nvptx-arch amdgpu-arch)
if(TARGET ${dep})
list(APPEND extra_deps ${dep})
Expand Down
9 changes: 1 addition & 8 deletions openmp/libomptarget/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,7 @@ endif()

pythonize_bool(LIBOMPTARGET_OMPT_SUPPORT)

# Check if this build supports the GPU libc.
set(LIBC_GPU_SUPPORT FALSE)
if("libc" IN_LIST LLVM_ENABLE_RUNTIMES AND (LIBC_GPU_BUILD OR
LIBC_GPU_ARCHITECTURES))
set(LIBC_GPU_SUPPORT TRUE)
endif()

set(LIBOMPTARGET_GPU_LIBC_SUPPORT ${LIBC_GPU_SUPPORT} CACHE BOOL
set(LIBOMPTARGET_GPU_LIBC_SUPPORT ${LLVM_LIBC_GPU_BUILD} CACHE BOOL
"Libomptarget support for the GPU libc")
pythonize_bool(LIBOMPTARGET_GPU_LIBC_SUPPORT)

Expand Down
6 changes: 5 additions & 1 deletion openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,12 @@ elseif(${LIBOMPTARGET_GPU_LIBC_SUPPORT})
find_library(llvmlibc_rpc_server NAMES llvmlibc_rpc_server
PATHS ${LIBOMPTARGET_LLVM_LIBRARY_DIR} NO_DEFAULT_PATH)
if(llvmlibc_rpc_server)
target_link_libraries(PluginCommon PRIVATE llvmlibc_rpc_server)
target_link_libraries(PluginCommon PRIVATE ${llvmlibc_rpc_server})
target_compile_definitions(PluginCommon PRIVATE LIBOMPTARGET_RPC_SUPPORT)
# We may need to get the headers directly from the 'libc' source directory.
target_include_directories(PluginCommon PRIVATE
${CMAKE_SOURCE_DIR}/../libc/utils/gpu/server
${CMAKE_SOURCE_DIR}/../libc/include)
endif()
endif()

Expand Down
3 changes: 2 additions & 1 deletion openmp/libomptarget/plugins-nextgen/common/src/RPC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
#if __has_include(<gpu-none-llvm/rpc_server.h>)
#include <gpu-none-llvm/rpc_server.h>
#elif defined(LIBOMPTARGET_RPC_SUPPORT)
#include <rpc_server.h>
// Just pull this out of the source if available.
#include "rpc_server.h"
#endif

using namespace llvm;
Expand Down
8 changes: 6 additions & 2 deletions openmp/libomptarget/test/lit.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,12 @@ def remove_suffix_if_present(name):

def add_libraries(source):
if config.libomptarget_has_libc:
return source + " " + config.llvm_library_dir + "/libcgpu.a " + \
config.llvm_library_intdir + "/libomptarget.devicertl.a"
if config.libomptarget_current_target.startswith('nvptx'):
return source + " " + config.llvm_library_dir + "/libcgpu-nvptx.a " + \
config.llvm_library_intdir + "/libomptarget.devicertl.a"
elif config.libomptarget_current_target.startswith('amdgcn'):
return source + " " + config.llvm_library_dir + "/libcgpu-amdgpu.a " + \
config.llvm_library_intdir + "/libomptarget.devicertl.a"
return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a"

# substitutions
Expand Down