diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index e5196bd8b5ae9..347b250260c4c 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1087,10 +1087,41 @@ static void addOpenMPDeviceLibC(const ToolChain &TC, const ArgList &Args, "llvm-libc-decls"); bool HasLibC = llvm::sys::fs::exists(LibCDecls) && llvm::sys::fs::is_directory(LibCDecls); - if (Args.hasFlag(options::OPT_gpulibc, options::OPT_nogpulibc, HasLibC)) { - CmdArgs.push_back("-lcgpu"); - CmdArgs.push_back("-lmgpu"); + if (!Args.hasFlag(options::OPT_gpulibc, options::OPT_nogpulibc, HasLibC)) + return; + + // We don't have access to the offloading toolchains here, so determine from + // the arguments if we have any active NVPTX or AMDGPU toolchains. + llvm::DenseSet Libraries; + if (const Arg *Targets = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) { + if (llvm::any_of(Targets->getValues(), + [](auto S) { return llvm::Triple(S).isAMDGPU(); })) { + Libraries.insert("-lcgpu-amdgpu"); + Libraries.insert("-lmgpu-amdgpu"); + } + if (llvm::any_of(Targets->getValues(), + [](auto S) { return llvm::Triple(S).isNVPTX(); })) { + Libraries.insert("-lcgpu-nvptx"); + Libraries.insert("-lmgpu-nvptx"); + } } + + for (StringRef Arch : Args.getAllArgValues(options::OPT_offload_arch_EQ)) { + if (llvm::any_of(llvm::split(Arch, ","), [](StringRef Str) { + return IsAMDGpuArch(StringToCudaArch(Str)); + })) { + Libraries.insert("-lcgpu-amdgpu"); + Libraries.insert("-lmgpu-amdgpu"); + } + if (llvm::any_of(llvm::split(Arch, ","), [](StringRef Str) { + return IsNVIDIAGpuArch(StringToCudaArch(Str)); + })) { + Libraries.insert("-lcgpu-nvptx"); + Libraries.insert("-lmgpu-nvptx"); + } + } + + llvm::append_range(CmdArgs, Libraries); } void tools::addOpenMPRuntimeLibraryPath(const ToolChain &TC, diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index bccc5fd9483ac..5da74a35d87ad 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -393,14 +393,28 @@ // // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ // RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \ +// RUN: --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc \ // RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ -// RUN: --offload-arch=sm_52 -gpulibc -nogpuinc %s 2>&1 \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: --offload-arch=sm_52,gfx803 -gpulibc -nogpuinc %s 2>&1 \ // RUN: | FileCheck --check-prefix=LIBC-GPU %s -// LIBC-GPU: "-lcgpu"{{.*}}"-lmgpu" +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ +// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \ +// RUN: --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc \ +// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \ +// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -gpulibc -nogpuinc %s 2>&1 \ +// RUN: | FileCheck --check-prefix=LIBC-GPU %s +// LIBC-GPU-DAG: "-lcgpu-amdgpu" +// LIBC-GPU-DAG: "-lmgpu-amdgpu" +// LIBC-GPU-DAG: "-lcgpu-nvptx" +// LIBC-GPU-DAG: "-lmgpu-nvptx" // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ // RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \ // RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ // RUN: --offload-arch=sm_52 -nogpulibc -nogpuinc %s 2>&1 \ // RUN: | FileCheck --check-prefix=NO-LIBC-GPU %s -// NO-LIBC-GPU-NOT: "-lcgpu"{{.*}}"-lmgpu" +// NO-LIBC-GPU-NOT: -lmgpu{{.*}}-lcgpu diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index 9f9839423499e..6a57fcec26e47 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -43,7 +43,7 @@ set(LIBC_NAMESPACE "__llvm_libc_${LLVM_VERSION_MAJOR}_${LLVM_VERSION_MINOR}_${LL CACHE STRING "The namespace to use to enclose internal implementations. Must start with '__llvm_libc'." ) -if(LLVM_LIBC_FULL_BUILD OR LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) +if(LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD) if(NOT LIBC_HDRGEN_EXE) # We need to set up hdrgen first since other targets depend on it. add_subdirectory(utils/LibcTableGenUtil) @@ -77,7 +77,7 @@ if(LIBC_HDRGEN_ONLY OR NEED_LIBC_HDRGEN) # to build libc-hdrgen and return. # Always make the RPC server availible to other projects for GPU mode. - if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) + if(LLVM_LIBC_GPU_BUILD) add_subdirectory(utils/gpu/server) endif() return() @@ -118,7 +118,7 @@ if(COMMAND_RETURN_CODE EQUAL 0) message(STATUS "Set COMPILER_RESOURCE_DIR to " "${COMPILER_RESOURCE_DIR} using --print-resource-dir") else() - if (LIBC_TARGET_ARCHITECTURE_IS_GPU) + if (LIBC_TARGET_OS_IS_GPU) message(FATAL_ERROR "COMPILER_RESOURCE_DIR must be set for GPU builds") else() set(COMPILER_RESOURCE_DIR OFF) @@ -216,11 +216,7 @@ foreach(config_path IN LISTS LIBC_CONFIG_JSON_FILE_LIST) load_libc_config(${config_path}/config.json ${cmd_line_conf}) endforeach() -if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - set(LIBC_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include) - set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/gpu-none-llvm) - set(LIBC_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) -elseif(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND LIBC_ENABLE_USE_BY_CLANG) +if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND LIBC_ENABLE_USE_BY_CLANG) set(LIBC_INCLUDE_DIR ${LLVM_BINARY_DIR}/include/${LLVM_DEFAULT_TARGET_TRIPLE}) set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}) set(LIBC_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}) @@ -235,7 +231,11 @@ else() set(LIBC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/include) set(LIBC_LIBRARY_DIR ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}) endif() - set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}) + if(LIBC_TARGET_OS_IS_GPU) + set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}) + else() + set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}) + endif() endif() if(LIBC_TARGET_TRIPLE) @@ -247,7 +247,7 @@ else() set(LIBC_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}) endif() -if(LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(LIBC_TARGET_OS_IS_GPU) include(prepare_libc_gpu_build) set(LIBC_ENABLE_UNITTESTS OFF) endif() diff --git a/libc/cmake/modules/LLVMLibCArchitectures.cmake b/libc/cmake/modules/LLVMLibCArchitectures.cmake index 623ed774be727..0dbc59ad643ac 100644 --- a/libc/cmake/modules/LLVMLibCArchitectures.cmake +++ b/libc/cmake/modules/LLVMLibCArchitectures.cmake @@ -6,18 +6,6 @@ # platform. # ------------------------------------------------------------------------------ -if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) - # We set the generic target and OS to "gpu" here. More specific defintions - # for the exact target GPU are set up in prepare_libc_gpu_build.cmake. - set(LIBC_TARGET_OS "gpu") - set(LIBC_TARGET_ARCHITECTURE_IS_GPU TRUE) - set(LIBC_TARGET_ARCHITECTURE "gpu") - if(LIBC_TARGET_TRIPLE) - message(WARNING "LIBC_TARGET_TRIPLE is ignored as LIBC_GPU_BUILD is on. ") - endif() - return() -endif() - if(MSVC) # If the compiler is visual c++ or equivalent, we will assume a host build. set(LIBC_TARGET_OS ${CMAKE_HOST_SYSTEM_NAME}) @@ -59,6 +47,10 @@ function(get_arch_and_system_from_triple triple arch_var sys_var) set(target_arch "riscv32") elseif(target_arch MATCHES "^riscv64") set(target_arch "riscv64") + elseif(target_arch MATCHES "^amdgcn") + set(target_arch "amdgpu") + elseif(target_arch MATCHES "^nvptx64") + set(target_arch "nvptx") else() return() endif() @@ -75,6 +67,12 @@ function(get_arch_and_system_from_triple triple arch_var sys_var) set(target_sys "darwin") endif() + # Setting OS name for GPU architectures. + list(GET triple_comps -1 gpu_target_sys) + if(gpu_target_sys MATCHES "^amdhsa" OR gpu_target_sys MATCHES "^cuda") + set(target_sys "gpu") + endif() + set(${sys_var} ${target_sys} PARENT_SCOPE) endfunction(get_arch_and_system_from_triple) @@ -156,6 +154,10 @@ elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "riscv64") elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "riscv32") set(LIBC_TARGET_ARCHITECTURE_IS_RISCV32 TRUE) set(LIBC_TARGET_ARCHITECTURE "riscv") +elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "amdgpu") + set(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU TRUE) +elseif(LIBC_TARGET_ARCHITECTURE STREQUAL "nvptx") + set(LIBC_TARGET_ARCHITECTURE_IS_NVPTX TRUE) else() message(FATAL_ERROR "Unsupported libc target architecture ${LIBC_TARGET_ARCHITECTURE}") @@ -178,6 +180,8 @@ elseif(LIBC_TARGET_OS STREQUAL "darwin") set(LIBC_TARGET_OS_IS_DARWIN TRUE) elseif(LIBC_TARGET_OS STREQUAL "windows") set(LIBC_TARGET_OS_IS_WINDOWS TRUE) +elseif(LIBC_TARGET_OS STREQUAL "gpu") + set(LIBC_TARGET_OS_IS_GPU TRUE) else() message(FATAL_ERROR "Unsupported libc target operating system ${LIBC_TARGET_OS}") diff --git a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake index 9e361f5fd8112..bbaeb9f0dc053 100644 --- a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake +++ b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake @@ -2,7 +2,7 @@ set(LLVM_LIBC_MPFR_INSTALL_PATH "" CACHE PATH "Path to where MPFR is installed ( if(LLVM_LIBC_MPFR_INSTALL_PATH) set(LIBC_TESTS_CAN_USE_MPFR TRUE) -elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU) +elseif(LIBC_TARGET_OS_IS_GPU) set(LIBC_TESTS_CAN_USE_MPFR FALSE) else() try_compile( diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index 140e4d51a9c2e..33ba5da4f8d57 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -82,10 +82,22 @@ function(_get_common_compile_options output_var flags) list(APPEND compile_options "/EHs-c-") list(APPEND compile_options "/GR-") endif() - if (LIBC_TARGET_ARCHITECTURE_IS_GPU) + if (LIBC_TARGET_OS_IS_GPU) list(APPEND compile_options "-nogpulib") list(APPEND compile_options "-fvisibility=hidden") list(APPEND compile_options "-fconvergent-functions") + list(APPEND compile_options "-flto") + + if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + list(APPEND compile_options "-Wno-unknown-cuda-version") + list(APPEND compile_options "SHELL:-mllvm -nvptx-emit-init-fini-kernel=false") + list(APPEND compile_options "--cuda-feature=+ptx63") + if(LIBC_CUDA_ROOT) + list(APPEND compile_options "--cuda-path=${LIBC_CUDA_ROOT}") + endif() + elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none") + endif() # Manually disable all standard include paths and include the resource # directory to prevent system headers from being included. @@ -138,73 +150,21 @@ function(_get_common_test_compile_options output_var flags) set(${output_var} ${compile_options} PARENT_SCOPE) endfunction() -# Obtains NVPTX specific arguments for compilation. -# The PTX feature is primarily based on the CUDA toolchain version. We want to -# be able to target NVPTX without an existing CUDA installation, so we need to -# set this manually. This simply sets the PTX feature to the minimum required -# for the features we wish to use on that target. The minimum PTX features used -# here roughly corresponds to the CUDA 9.0 release. -# Adjust as needed for desired PTX features. -function(get_nvptx_compile_options output_var gpu_arch) - set(nvptx_options "") - list(APPEND nvptx_options "-march=${gpu_arch}") - list(APPEND nvptx_options "-Wno-unknown-cuda-version") - list(APPEND nvptx_options "SHELL:-mllvm -nvptx-emit-init-fini-kernel=false") - if(${gpu_arch} STREQUAL "sm_35") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_37") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_50") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_52") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_53") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_60") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_61") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_62") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_70") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_72") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_75") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_80") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - elseif(${gpu_arch} STREQUAL "sm_86") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - elseif(${gpu_arch} STREQUAL "sm_89") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - elseif(${gpu_arch} STREQUAL "sm_90") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - else() - message(FATAL_ERROR "Unknown Nvidia GPU architecture '${gpu_arch}'") - endif() - - if(LIBC_CUDA_ROOT) - list(APPEND nvptx_options "--cuda-path=${LIBC_CUDA_ROOT}") - endif() - set(${output_var} ${nvptx_options} PARENT_SCOPE) -endfunction() - function(_get_hermetic_test_compile_options output_var flags) _get_compile_options_from_flags(compile_flags ${flags}) list(APPEND compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${compile_flags} ${flags} -fpie -ffreestanding -fno-exceptions -fno-rtti) # The GPU build requires overriding the default CMake triple and architecture. - if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU) + if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) list(APPEND compile_options -nogpulib -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto - --target=${LIBC_GPU_TARGET_TRIPLE} -mcode-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}) - elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) - get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE}) + elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) list(APPEND compile_options - -nogpulib ${nvptx_options} -fno-use-cxa-atexit --target=${LIBC_GPU_TARGET_TRIPLE}) + "SHELL:-mllvm -nvptx-emit-init-fini-kernel=false" + --cuda-path=${LIBC_CUDA_ROOT} + -nogpulib -march=${LIBC_GPU_TARGET_ARCHITECTURE} -fno-use-cxa-atexit) endif() set(${output_var} ${compile_options} PARENT_SCOPE) endfunction() diff --git a/libc/cmake/modules/LLVMLibCHeaderRules.cmake b/libc/cmake/modules/LLVMLibCHeaderRules.cmake index 9e9b598721ab3..19515b1cbcc18 100644 --- a/libc/cmake/modules/LLVMLibCHeaderRules.cmake +++ b/libc/cmake/modules/LLVMLibCHeaderRules.cmake @@ -139,7 +139,7 @@ function(add_gen_header target_name) ${hdrgen_deps} ) - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) + if(LIBC_TARGET_OS_IS_GPU) file(MAKE_DIRECTORY ${LIBC_INCLUDE_DIR}/llvm-libc-decls) set(decl_out_file ${LIBC_INCLUDE_DIR}/llvm-libc-decls/${relative_path}) add_custom_command( diff --git a/libc/cmake/modules/LLVMLibCLibraryRules.cmake b/libc/cmake/modules/LLVMLibCLibraryRules.cmake index 81c207ec23176..f15ffd5f9c218 100644 --- a/libc/cmake/modules/LLVMLibCLibraryRules.cmake +++ b/libc/cmake/modules/LLVMLibCLibraryRules.cmake @@ -50,31 +50,9 @@ function(collect_object_file_deps target result) endif() endfunction(collect_object_file_deps) -# A rule to build a library from a collection of entrypoint objects. -# Usage: -# add_entrypoint_library( -# DEPENDS -# ) -# -# NOTE: If one wants an entrypoint to be available in a library, then they will -# have to list the entrypoint target explicitly in the DEPENDS list. Implicit -# entrypoint dependencies will not be added to the library. -function(add_entrypoint_library target_name) - cmake_parse_arguments( - "ENTRYPOINT_LIBRARY" - "" # No optional arguments - "" # No single value arguments - "DEPENDS" # Multi-value arguments - ${ARGN} - ) - if(NOT ENTRYPOINT_LIBRARY_DEPENDS) - message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list " - "of 'add_entrypoint_object' targets.") - endif() - - get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS}) +function(get_all_object_file_deps result fq_deps_list) set(all_deps "") - foreach(dep IN LISTS fq_deps_list) + foreach(dep ${fq_deps_list}) get_target_property(dep_type ${dep} "TARGET_TYPE") if(NOT ((${dep_type} STREQUAL ${ENTRYPOINT_OBJ_TARGET_TYPE}) OR (${dep_type} STREQUAL ${ENTRYPOINT_EXT_TARGET_TYPE}) OR @@ -102,6 +80,121 @@ function(add_entrypoint_library target_name) list(APPEND all_deps ${entrypoint_target}) endforeach(dep) list(REMOVE_DUPLICATES all_deps) + set(${result} ${all_deps} PARENT_SCOPE) +endfunction() + +# A rule to build a library from a collection of entrypoint objects and bundle +# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'. +# Usage: +# add_gpu_entrypoint_library( +# DEPENDS +# ) +function(add_gpu_entrypoint_library target_name) + cmake_parse_arguments( + "ENTRYPOINT_LIBRARY" + "" # No optional arguments + "" # No single value arguments + "DEPENDS" # Multi-value arguments + ${ARGN} + ) + if(NOT ENTRYPOINT_LIBRARY_DEPENDS) + message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list " + "of 'add_entrypoint_object' targets.") + endif() + + get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS}) + get_all_object_file_deps(all_deps "${fq_deps_list}") + + # The GPU 'libc' needs to be exported in a format that can be linked with + # offloading langauges like OpenMP or CUDA. This wraps every GPU object into a + # fat binary and adds them to a static library. + set(objects "") + foreach(dep IN LISTS all_deps) + set(object $<$,${dep}>:$>) + string(FIND ${dep} "." last_dot_loc REVERSE) + math(EXPR name_loc "${last_dot_loc} + 1") + string(SUBSTRING ${dep} ${name_loc} -1 name) + if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63) + elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa) + endif() + + # Use the 'clang-offload-packager' to merge these files into a binary blob. + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin" + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary + COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER} + "${prefix},file=$" -o + ${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin + DEPENDS ${dep} + COMMENT "Packaging LLVM offloading binary for '${object}'" + ) + add_custom_target(${dep}.__gpubin__ DEPENDS ${dep} + "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin") + + # CMake does not permit setting the name on object files. In order to have + # human readable names we create an empty stub file with the entrypoint + # name. This empty file will then have the created binary blob embedded. + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp" + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp + DEPENDS ${dep} ${dep}.__gpubin__ + ) + add_custom_target(${dep}.__stub__ + DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp") + + add_library(${dep}.__fatbin__ + EXCLUDE_FROM_ALL OBJECT + "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp" + ) + + # This is always compiled for the LLVM host triple instead of the native GPU + # triple that is used by default in the build. + target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib) + target_compile_options(${dep}.__fatbin__ PRIVATE + --target=${LLVM_HOST_TRIPLE} + "SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin") + add_dependencies(${dep}.__fatbin__ ${dep} ${dep}.__stub__ ${dep}.__gpubin__) + + # Set the list of newly create fat binaries containing embedded device code. + list(APPEND objects $) + endforeach() + + add_library( + ${target_name} + STATIC + ${objects} + ) + set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR}) +endfunction(add_gpu_entrypoint_library) + +# A rule to build a library from a collection of entrypoint objects. +# Usage: +# add_entrypoint_library( +# DEPENDS +# ) +# +# NOTE: If one wants an entrypoint to be available in a library, then they will +# have to list the entrypoint target explicitly in the DEPENDS list. Implicit +# entrypoint dependencies will not be added to the library. +function(add_entrypoint_library target_name) + cmake_parse_arguments( + "ENTRYPOINT_LIBRARY" + "" # No optional arguments + "" # No single value arguments + "DEPENDS" # Multi-value arguments + ${ARGN} + ) + if(NOT ENTRYPOINT_LIBRARY_DEPENDS) + message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list " + "of 'add_entrypoint_object' targets.") + endif() + + get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS}) + get_all_object_file_deps(all_deps "${fq_deps_list}") + set(objects "") foreach(dep IN LISTS all_deps) list(APPEND objects $<$,${dep}>:$>) diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake index 308ba7d0d5dd7..78536f4eec55a 100644 --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -1,175 +1,5 @@ set(OBJECT_LIBRARY_TARGET_TYPE "OBJECT_LIBRARY") -# Build the object target for a single GPU arch. -# Usage: -# _build_gpu_object_for_single_arch( -# -# -# SRCS -# HDRS -# DEPENDS -# COMPILE_OPTIONS -# FLAGS -# ) -function(_build_gpu_object_for_single_arch fq_target_name gpu_arch) - cmake_parse_arguments( - "ADD_GPU_OBJ" - "" # No optional arguments - "NAME;CXX_STANDARD" # Single value arguments - "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS;FLAGS" # Multi value arguments - ${ARGN} - ) - - if(NOT ADD_GPU_OBJ_CXX_STANDARD) - set(ADD_GPU_OBJ_CXX_STANDARD ${CMAKE_CXX_STANDARD}) - endif() - - set(compile_options ${ADD_GPU_OBJ_COMPILE_OPTIONS}) - # Derive the triple from the specified architecture. - if("${gpu_arch}" IN_LIST all_amdgpu_architectures) - set(gpu_target_triple ${AMDGPU_TARGET_TRIPLE}) - list(APPEND compile_options "-mcpu=${gpu_arch}") - list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none") - list(APPEND compile_options "-emit-llvm") - elseif("${gpu_arch}" IN_LIST all_nvptx_architectures) - set(gpu_target_triple ${NVPTX_TARGET_TRIPLE}) - get_nvptx_compile_options(nvptx_options ${gpu_arch}) - list(APPEND compile_options "${nvptx_options}") - else() - message(FATAL_ERROR "Unknown GPU architecture '${gpu_arch}'") - endif() - list(APPEND compile_options "--target=${gpu_target_triple}") - - # Build the library for this target architecture. We always emit LLVM-IR for - # packaged GPU binaries. - add_library(${fq_target_name} - EXCLUDE_FROM_ALL - OBJECT - ${ADD_GPU_OBJ_SRCS} - ${ADD_GPU_OBJ_HDRS} - ) - - target_compile_options(${fq_target_name} PRIVATE ${compile_options}) - target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - set_target_properties(${fq_target_name} PROPERTIES CXX_STANDARD ${ADD_GPU_OBJ_CXX_STANDARD}) - if(ADD_GPU_OBJ_DEPENDS) - add_dependencies(${fq_target_name} ${ADD_GPU_OBJ_DEPENDS}) - set_target_properties(${fq_target_name} PROPERTIES DEPS "${ADD_GPU_OBJ_DEPENDS}") - endif() -endfunction(_build_gpu_object_for_single_arch) - -# Build the object target for the GPU. -# This compiles the target for all supported architectures and embeds it into -# host binary for installing. -# Usage: -# _build_gpu_object_bundle( -# -# SRCS -# HDRS -# DEPENDS -# COMPILE_OPTIONS -# FLAGS -# ) -function(_build_gpu_object_bundle fq_target_name) - cmake_parse_arguments( - "ADD_GPU_OBJ" - "" # No optional arguments - "NAME;CXX_STANDARD" # Single value arguments - "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS;FLAGS" # Multi value arguments - ${ARGN} - ) - - if(NOT ADD_GPU_OBJ_CXX_STANDARD) - set(ADD_GPU_OBJ_CXX_STANDARD ${CMAKE_CXX_STANDARD}) - endif() - - foreach(add_gpu_obj_src ${ADD_GPU_OBJ_SRCS}) - # The packaged version will be built for every target GPU architecture. We do - # this so we can support multiple accelerators on the same machine. - foreach(gpu_arch ${LIBC_GPU_ARCHITECTURES}) - get_filename_component(src_name ${add_gpu_obj_src} NAME) - set(gpu_target_name ${fq_target_name}.${src_name}.${gpu_arch}) - - _build_gpu_object_for_single_arch( - ${gpu_target_name} - ${gpu_arch} - CXX_STANDARD ${ADD_GPU_OBJ_CXX_STANDARD} - HDRS ${ADD_GPU_OBJ_HDRS} - SRCS ${add_gpu_obj_src} - COMPILE_OPTIONS - ${ADD_GPU_OBJ_COMPILE_OPTIONS} - "-emit-llvm" - DEPENDS ${ADD_GPU_OBJ_DEPENDS} - ) - # Append this target to a list of images to package into a single binary. - set(input_file $) - if("${gpu_arch}" IN_LIST all_nvptx_architectures) - get_nvptx_compile_options(nvptx_options ${gpu_arch}) - string(REGEX MATCH "\\+ptx[0-9]+" nvptx_ptx_feature ${nvptx_options}) - list(APPEND packager_images - --image=file=${input_file},arch=${gpu_arch},triple=${NVPTX_TARGET_TRIPLE},feature=${nvptx_ptx_feature}) - else() - list(APPEND packager_images - --image=file=${input_file},arch=${gpu_arch},triple=${AMDGPU_TARGET_TRIPLE}) - endif() - list(APPEND gpu_target_objects ${input_file}) - endforeach() - - # After building the target for the desired GPUs we must package the output - # into a fatbinary, see https://clang.llvm.org/docs/OffloadingDesign.html for - # more information. - set(packaged_target_name ${fq_target_name}.${src_name}.__gpu__) - set(packaged_output_name ${CMAKE_CURRENT_BINARY_DIR}/${fq_target_name}.${src_name}.gpubin) - - add_custom_command(OUTPUT ${packaged_output_name} - COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER} - ${packager_images} -o ${packaged_output_name} - DEPENDS ${gpu_target_objects} ${add_gpu_obj_src} ${ADD_GPU_OBJ_HDRS} - COMMENT "Packaging LLVM offloading binary") - add_custom_target(${packaged_target_name} DEPENDS ${packaged_output_name}) - list(APPEND packaged_gpu_names ${packaged_target_name}) - list(APPEND packaged_gpu_binaries ${packaged_output_name}) - endforeach() - - # We create an empty 'stub' file for the host to contain the embedded device - # code. This will be packaged into 'libcgpu.a'. - # TODO: In the future we will want to combine every architecture for a target - # into a single bitcode file and use that. For now we simply build for - # every single one and let the offloading linker handle it. - string(FIND ${fq_target_name} "." last_dot_loc REVERSE) - math(EXPR name_loc "${last_dot_loc} + 1") - string(SUBSTRING ${fq_target_name} ${name_loc} -1 target_name) - set(stub_filename "${target_name}.cpp") - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename}" - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs/ - COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename} - DEPENDS ${gpu_target_objects} ${ADD_GPU_OBJ_SRCS} ${ADD_GPU_OBJ_HDRS} - ) - set(stub_target_name ${fq_target_name}.__stub__) - add_custom_target(${stub_target_name} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename}) - - add_library( - ${fq_target_name} - # We want an object library as the objects will eventually get packaged into - # an archive (like libcgpu.a). - EXCLUDE_FROM_ALL - OBJECT - ${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename} - ) - target_compile_options(${fq_target_name} BEFORE PRIVATE - ${ADD_GPU_OBJ_COMPILE_OPTIONS} -nostdlib) - foreach(packaged_gpu_binary ${packaged_gpu_binaries}) - target_compile_options(${fq_target_name} PRIVATE - "SHELL:-Xclang -fembed-offload-object=${packaged_gpu_binary}") - endforeach() - target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - add_dependencies(${fq_target_name} - ${full_deps_list} ${packaged_gpu_names} ${stub_target_name}) -endfunction() - # Rule which is essentially a wrapper over add_library to compile a set of # sources to object files. # Usage: @@ -214,53 +44,37 @@ function(create_object_library fq_target_name) message(FATAL_ERROR "'add_object_library' rule requires SRCS to be specified.") endif() - # The GPU build uses a separate internal file. - if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND NOT ${ADD_OBJECT_NO_GPU_BUNDLE}) - set(internal_target_name ${fq_target_name}.__internal__) - set(public_packaging_for_internal "") - else() - set(internal_target_name ${fq_target_name}) - set(public_packaging_for_internal "-DLIBC_COPT_PUBLIC_PACKAGING") - endif() + set(internal_target_name ${fq_target_name}.__internal__) + set(public_packaging_for_internal "-DLIBC_COPT_PUBLIC_PACKAGING") _get_common_compile_options(compile_options "${ADD_OBJECT_FLAGS}") list(APPEND compile_options ${ADD_OBJECT_COMPILE_OPTIONS}) - # GPU builds require special handling for the objects because we want to - # export several different targets at once, e.g. for both Nvidia and AMD. - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - if(NOT ${ADD_OBJECT_NO_GPU_BUNDLE}) - _build_gpu_object_bundle( - ${fq_target_name} - SRCS ${ADD_OBJECT_SRCS} - HDRS ${ADD_OBJECT_HDRS} - CXX_STANDARD ${ADD_OBJECT_CXX_STANDARD} - COMPILE_OPTIONS ${compile_options} "-DLIBC_COPT_PUBLIC_PACKAGING" - DEPENDS ${fq_deps_list} - ) - endif() - # When the target for GPU is not bundled, internal_target_name is the same - # as fq_targetname - _build_gpu_object_for_single_arch( - ${internal_target_name} - ${LIBC_GPU_TARGET_ARCHITECTURE} - SRCS ${ADD_OBJECT_SRCS} - HDRS ${ADD_OBJECT_HDRS} - CXX_STANDARD ${ADD_OBJECT_CXX_STANDARD} - COMPILE_OPTIONS ${compile_options} ${public_packaging_for_internal} - DEPENDS ${fq_deps_list} - ) - else() + add_library( + ${fq_target_name} + EXCLUDE_FROM_ALL + OBJECT + ${ADD_OBJECT_SRCS} + ${ADD_OBJECT_HDRS} + ) + target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) + target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) + target_compile_options(${fq_target_name} PRIVATE ${compile_options}) + + # The NVPTX target is installed as LLVM-IR but the internal testing toolchain + # cannot handle it natively. Make a separate internal target for testing. + if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX AND NOT LIBC_GPU_TESTS_DISABLED) add_library( - ${fq_target_name} + ${internal_target_name} EXCLUDE_FROM_ALL OBJECT ${ADD_OBJECT_SRCS} ${ADD_OBJECT_HDRS} ) - target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - target_compile_options(${fq_target_name} PRIVATE ${compile_options}) + target_include_directories(${internal_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) + target_include_directories(${internal_target_name} PRIVATE ${LIBC_SOURCE_DIR}) + target_compile_options(${internal_target_name} PRIVATE ${compile_options} + -fno-lto -march=${LIBC_GPU_TARGET_ARCHITECTURE}) endif() if(SHOW_INTERMEDIATE_OBJECTS) @@ -290,13 +104,18 @@ function(create_object_library fq_target_name) FLAGS "${ADD_OBJECT_FLAGS}" ) + # If we built a separate internal target we want to use those target objects + # for testing instead of the exported target. + set(target_objects ${fq_target_name}) if(TARGET ${internal_target_name}) - set_target_properties( - ${fq_target_name} - PROPERTIES - OBJECT_FILES "$" - ) + set(target_objects ${internal_target_name}) endif() + + set_target_properties( + ${fq_target_name} + PROPERTIES + OBJECT_FILES "$" + ) endfunction(create_object_library) function(add_object_library target_name) @@ -389,12 +208,19 @@ function(create_entrypoint_object fq_target_name) get_target_property(object_file ${fq_dep_name} "OBJECT_FILE") get_target_property(object_file_raw ${fq_dep_name} "OBJECT_FILE_RAW") - add_library( - ${internal_target_name} - EXCLUDE_FROM_ALL - OBJECT - ${object_file_raw} - ) + + # If the system cannot build the GPU tests we simply make a dummy target. + if(LIBC_TARGET_OS_IS_GPU AND LIBC_GPU_TESTS_DISABLED) + add_custom_target(${internal_target_name}) + else() + add_library( + ${internal_target_name} + EXCLUDE_FROM_ALL + OBJECT + ${object_file_raw} + ) + endif() + add_dependencies(${internal_target_name} ${fq_dep_name}) add_library( ${fq_target_name} @@ -441,60 +267,42 @@ function(create_entrypoint_object fq_target_name) endif() endif() - # GPU builds require special handling for the objects because we want to - # export several different targets at once, e.g. for both Nvidia and AMD. - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - _build_gpu_object_bundle( - ${fq_target_name} - SRCS ${ADD_ENTRYPOINT_OBJ_SRCS} - HDRS ${ADD_ENTRYPOINT_OBJ_HDRS} - COMPILE_OPTIONS ${common_compile_options} "-DLIBC_COPT_PUBLIC_PACKAGING" - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - DEPENDS ${full_deps_list} - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" - ) - _build_gpu_object_for_single_arch( - ${internal_target_name} - ${LIBC_GPU_TARGET_ARCHITECTURE} - SRCS ${ADD_ENTRYPOINT_OBJ_SRCS} - HDRS ${ADD_ENTRYPOINT_OBJ_HDRS} - COMPILE_OPTIONS ${common_compile_options} - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - DEPENDS ${full_deps_list} - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" - ) - else() - add_library( - ${internal_target_name} - # TODO: We don't need an object library for internal consumption. - # A future change should switch this to a normal static library. - EXCLUDE_FROM_ALL - OBJECT - ${ADD_ENTRYPOINT_OBJ_SRCS} - ${ADD_ENTRYPOINT_OBJ_HDRS} - ) - target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options}) - target_include_directories(${internal_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) - target_include_directories(${internal_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - add_dependencies(${internal_target_name} ${full_deps_list}) - target_link_libraries(${internal_target_name} ${full_deps_list}) - - add_library( - ${fq_target_name} - # We want an object library as the objects will eventually get packaged into - # an archive (like libc.a). - EXCLUDE_FROM_ALL - OBJECT - ${ADD_ENTRYPOINT_OBJ_SRCS} - ${ADD_ENTRYPOINT_OBJ_HDRS} - ) - target_compile_options(${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLIBC_COPT_PUBLIC_PACKAGING) - target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - add_dependencies(${fq_target_name} ${full_deps_list}) - target_link_libraries(${fq_target_name} ${full_deps_list}) + add_library( + ${internal_target_name} + # TODO: We don't need an object library for internal consumption. + # A future change should switch this to a normal static library. + EXCLUDE_FROM_ALL + OBJECT + ${ADD_ENTRYPOINT_OBJ_SRCS} + ${ADD_ENTRYPOINT_OBJ_HDRS} + ) + target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options}) + target_include_directories(${internal_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) + target_include_directories(${internal_target_name} PRIVATE ${LIBC_SOURCE_DIR}) + add_dependencies(${internal_target_name} ${full_deps_list}) + target_link_libraries(${internal_target_name} ${full_deps_list}) + + # The NVPTX target cannot use LTO for the internal targets used for testing. + if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + target_compile_options(${internal_target_name} PRIVATE + -fno-lto -march=${LIBC_GPU_TARGET_ARCHITECTURE}) endif() + add_library( + ${fq_target_name} + # We want an object library as the objects will eventually get packaged into + # an archive (like libc.a). + EXCLUDE_FROM_ALL + OBJECT + ${ADD_ENTRYPOINT_OBJ_SRCS} + ${ADD_ENTRYPOINT_OBJ_HDRS} + ) + target_compile_options(${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLIBC_COPT_PUBLIC_PACKAGING) + target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) + target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) + add_dependencies(${fq_target_name} ${full_deps_list}) + target_link_libraries(${fq_target_name} ${full_deps_list}) + set_target_properties( ${fq_target_name} PROPERTIES diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake index 6ca9516ff7a0e..373cbd6853859 100644 --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -449,7 +449,7 @@ function(add_integration_test test_name) ${fq_build_target_name} EXCLUDE_FROM_ALL # The NVIDIA 'nvlink' linker does not currently support static libraries. - $<$:${link_object_files}> + $<$:${link_object_files}> ${INTEGRATION_TEST_SRCS} ${INTEGRATION_TEST_HDRS} ) @@ -461,8 +461,17 @@ function(add_integration_test test_name) _get_hermetic_test_compile_options(compile_options "${INTEGRATION_TEST_COMPILE_OPTIONS}") target_compile_options(${fq_build_target_name} PRIVATE ${compile_options}) - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - target_link_options(${fq_build_target_name} PRIVATE -nostdlib -static) + if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + target_link_options(${fq_build_target_name} PRIVATE + -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto + "-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static + "-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}") + elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + # We need to use the internal object versions for NVPTX. + set(internal_suffix ".__internal__") + target_link_options(${fq_build_target_name} PRIVATE + -march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static + "--cuda-path=${LIBC_CUDA_ROOT}") elseif(LIBC_CC_SUPPORTS_NOSTDLIBPP) target_link_options(${fq_build_target_name} PRIVATE -nolibc -nostartfiles -nostdlib++ -static) else() @@ -474,9 +483,10 @@ function(add_integration_test test_name) target_link_libraries( ${fq_build_target_name} # The NVIDIA 'nvlink' linker does not currently support static libraries. - $<$>:${fq_target_name}.__libc__> - libc.startup.${LIBC_TARGET_OS}.crt1 - libc.test.IntegrationTest.test) + $<$>:${fq_target_name}.__libc__> + libc.startup.${LIBC_TARGET_OS}.crt1${internal_suffix} + libc.test.IntegrationTest.test${internal_suffix} + ) add_dependencies(${fq_build_target_name} libc.test.IntegrationTest.test ${INTEGRATION_TEST_DEPENDS}) @@ -495,7 +505,7 @@ function(add_integration_test test_name) # makes `add_custom_target` construct the correct command and execute it. set(test_cmd ${INTEGRATION_TEST_ENV} - $<$:${gpu_loader_exe}> + $<$:${gpu_loader_exe}> ${CMAKE_CROSSCOMPILING_EMULATOR} ${INTEGRATION_TEST_LOADER_ARGS} $ ${INTEGRATION_TEST_ARGS}) @@ -606,7 +616,7 @@ function(add_libc_hermetic_test test_name) ${fq_build_target_name} EXCLUDE_FROM_ALL # The NVIDIA 'nvlink' linker does not currently support static libraries. - $<$:${link_object_files}> + $<$:${link_object_files}> ${HERMETIC_TEST_SRCS} ${HERMETIC_TEST_HDRS} ) @@ -615,6 +625,8 @@ function(add_libc_hermetic_test test_name) RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} #OUTPUT_NAME ${fq_target_name} ) + + _get_hermetic_test_compile_options(compile_options "${HERMETIC_TEST_COMPILE_OPTIONS}") target_include_directories(${fq_build_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) target_include_directories(${fq_build_target_name} PRIVATE ${LIBC_SOURCE_DIR}) _get_hermetic_test_compile_options(compile_options "${HERMETIC_TEST_COMPILE_OPTIONS}") @@ -629,8 +641,17 @@ function(add_libc_hermetic_test test_name) endif() endforeach() - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - target_link_options(${fq_build_target_name} PRIVATE -nostdlib -static) + if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + target_link_options(${fq_build_target_name} PRIVATE + -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto + "-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static + "-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}") + elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + # We need to use the internal object versions for NVPTX. + set(internal_suffix ".__internal__") + target_link_options(${fq_build_target_name} PRIVATE + -march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static + "--cuda-path=${LIBC_CUDA_ROOT}") elseif(LIBC_CC_SUPPORTS_NOSTDLIBPP) target_link_options(${fq_build_target_name} PRIVATE -nolibc -nostartfiles -nostdlib++ -static) else() @@ -642,12 +663,12 @@ function(add_libc_hermetic_test test_name) target_link_libraries( ${fq_build_target_name} PRIVATE - libc.startup.${LIBC_TARGET_OS}.crt1 + libc.startup.${LIBC_TARGET_OS}.crt1${internal_suffix} ${link_libraries} LibcTest.hermetic LibcHermeticTestSupport.hermetic # The NVIDIA 'nvlink' linker does not currently support static libraries. - $<$>:${fq_target_name}.__libc__>) + $<$>:${fq_target_name}.__libc__>) add_dependencies(${fq_build_target_name} LibcTest.hermetic libc.test.UnitTest.ErrnoSetterMatcher @@ -660,7 +681,7 @@ function(add_libc_hermetic_test test_name) endif() set(test_cmd ${HERMETIC_TEST_ENV} - $<$:${gpu_loader_exe}> ${CMAKE_CROSSCOMPILING_EMULATOR} ${HERMETIC_TEST_LOADER_ARGS} + $<$:${gpu_loader_exe}> ${CMAKE_CROSSCOMPILING_EMULATOR} ${HERMETIC_TEST_LOADER_ARGS} $ ${HERMETIC_TEST_ARGS}) add_custom_target( ${fq_target_name} diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake index 2086175bae6c7..75beef86760c8 100644 --- a/libc/cmake/modules/prepare_libc_gpu_build.cmake +++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake @@ -1,23 +1,8 @@ -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) message(FATAL_ERROR "libc build: Invalid attempt to set up GPU architectures.") endif() -# Set up the target architectures to build the GPU libc for. -set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906" - "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942" - "gfx1010;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034" - "gfx1035;gfx1036" - "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151") -set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62" - "sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90") -set(all_gpu_architectures - "${all_amdgpu_architectures};${all_nvptx_architectures}") -set(LIBC_GPU_ARCHITECTURES "all" CACHE STRING - "List of GPU architectures to build the libc for.") -set(AMDGPU_TARGET_TRIPLE "amdgcn-amd-amdhsa") -set(NVPTX_TARGET_TRIPLE "nvptx64-nvidia-cuda") - # Ensure the compiler is a valid clang when building the GPU target. set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}") if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND @@ -31,40 +16,6 @@ if(NOT LLVM_LIBC_FULL_BUILD) "GPU.") endif() -# Identify any locally installed AMD GPUs on the system using 'amdgpu-arch'. -find_program(LIBC_AMDGPU_ARCH - NAMES amdgpu-arch NO_DEFAULT_PATH - PATHS ${LLVM_BINARY_DIR}/bin /opt/rocm/llvm/bin/) - -# Identify any locally installed NVIDIA GPUs on the system using 'nvptx-arch'. -find_program(LIBC_NVPTX_ARCH - NAMES nvptx-arch NO_DEFAULT_PATH - PATHS ${LLVM_BINARY_DIR}/bin) - -# Get the list of all natively supported GPU architectures. -set(detected_gpu_architectures "") -foreach(arch_tool ${LIBC_NVPTX_ARCH} ${LIBC_AMDGPU_ARCH}) - if(arch_tool) - execute_process(COMMAND ${arch_tool} - OUTPUT_VARIABLE arch_tool_output - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REPLACE "\n" ";" arch_list "${arch_tool_output}") - list(APPEND detected_gpu_architectures "${arch_list}") - endif() -endforeach() -list(REMOVE_DUPLICATES detected_gpu_architectures) - -if(LIBC_GPU_ARCHITECTURES STREQUAL "all") - set(LIBC_GPU_ARCHITECTURES ${all_gpu_architectures}) -elseif(LIBC_GPU_ARCHITECTURES STREQUAL "native") - if(NOT detected_gpu_architectures) - message(FATAL_ERROR "No GPUs found on the system when using 'native'") - endif() - set(LIBC_GPU_ARCHITECTURES ${detected_gpu_architectures}) -endif() -message(STATUS "Building libc for the following GPU architecture(s): " - "${LIBC_GPU_ARCHITECTURES}") - # Identify the program used to package multiple images into a single binary. find_program(LIBC_CLANG_OFFLOAD_PACKAGER NAMES clang-offload-packager NO_DEFAULT_PATH @@ -87,49 +38,54 @@ else() endif() set(LIBC_GPU_TEST_ARCHITECTURE "" CACHE STRING "Architecture for the GPU tests") +if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + check_cxx_compiler_flag("-nogpulib -mcpu=native" PLATFORM_HAS_GPU) +elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + # Identify any locally installed NVIDIA GPUs on the system using 'nvptx-arch'. + # Using 'check_cxx_compiler_flag' does not work currently due to the link job. + find_program(LIBC_NVPTX_ARCH + NAMES nvptx-arch NO_DEFAULT_PATH + PATHS ${LLVM_BINARY_DIR}/bin) + if(LIBC_NVPTX_ARCH) + execute_process(COMMAND ${LIBC_NVPTX_ARCH} + OUTPUT_VARIABLE arch_tool_output + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(arch_tool_output MATCHES "^sm_[0-9]+") + set(PLATFORM_HAS_GPU TRUE) + endif() + endif() +endif() set(gpu_test_architecture "") if(LIBC_GPU_TEST_ARCHITECTURE) + set(LIBC_GPU_TESTS_DISABLED FALSE) set(gpu_test_architecture ${LIBC_GPU_TEST_ARCHITECTURE}) message(STATUS "Using user-specified GPU architecture for testing: " "'${gpu_test_architecture}'") -elseif(detected_gpu_architectures) - list(GET detected_gpu_architectures 0 gpu_test_architecture) +elseif(PLATFORM_HAS_GPU) + set(LIBC_GPU_TESTS_DISABLED FALSE) + set(gpu_test_architecture "native") message(STATUS "Using GPU architecture detected on the system for testing: " - "'${gpu_test_architecture}'") + "'native'") else() - list(LENGTH LIBC_GPU_ARCHITECTURES n_gpu_archs) - if (${n_gpu_archs} EQUAL 1) - set(gpu_test_architecture ${LIBC_GPU_ARCHITECTURES}) - message(STATUS "Using user-specified GPU architecture for testing: " - "'${gpu_test_architecture}'") - else() - message(STATUS "No GPU architecture set for testing. GPU tests will not be " - "availibe. Set 'LIBC_GPU_TEST_ARCHITECTURE' to override.") - return() - endif() + set(LIBC_GPU_TESTS_DISABLED TRUE) + message(STATUS "No GPU architecture detected or provided, tests will not be " + "built") endif() +set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}") -if("${gpu_test_architecture}" IN_LIST all_amdgpu_architectures) - set(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU TRUE) - set(LIBC_GPU_TARGET_TRIPLE ${AMDGPU_TARGET_TRIPLE}) - set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}") -elseif("${gpu_test_architecture}" IN_LIST all_nvptx_architectures) - set(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX TRUE) - set(LIBC_GPU_TARGET_TRIPLE ${NVPTX_TARGET_TRIPLE}) - set(LIBC_GPU_TARGET_ARCHITECTURE "${gpu_test_architecture}") -else() - message(FATAL_ERROR "Unknown GPU architecture '${gpu_test_architecture}'") -endif() +if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + # FIXME: This is a hack required to keep the CUDA package from trying to find + # pthreads. We only link the CUDA driver, so this is unneeded. + add_library(CUDA::cudart_static_deps IMPORTED INTERFACE) -if(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) find_package(CUDAToolkit QUIET) if(CUDAToolkit_FOUND) get_filename_component(LIBC_CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE) endif() endif() -if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU) +if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) # The AMDGPU environment uses different code objects to encode the ABI for # kernel calls and intrinsic functions. We want to specify this manually to # conform to whatever the test suite was built to handle. diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst index 71f5e7ba20393..79b9116c38ed2 100644 --- a/libc/docs/gpu/using.rst +++ b/libc/docs/gpu/using.rst @@ -14,25 +14,25 @@ Building the GPU library LLVM's libc GPU support *must* be built with an up-to-date ``clang`` compiler due to heavy reliance on ``clang``'s GPU support. This can be done automatically -using the ``LLVM_ENABLE_RUNTIMES=libc`` option. To enable libc for the GPU, -enable the ``LIBC_GPU_BUILD`` option. By default, ``libcgpu.a`` will be built -using every supported GPU architecture. To restrict the number of architectures -build, either set ``LIBC_GPU_ARCHITECTURES`` to the list of desired -architectures manually or use ``native`` to detect the GPUs on your system. A -typical ``cmake`` configuration will look like this: +using the LLVM runtimes support. The GPU build is done using cross-compilation +to the GPU architecture. This project currently supports AMD and NVIDIA GPUs +which can be targeted using the appropriate target name. The following +invocation will enable a cross-compiling build for the GPU architecture and +enable the ``libc`` project only for them. .. code-block:: sh $> cd llvm-project # The llvm-project checkout $> mkdir build $> cd build - $> cmake ../llvm -G Ninja \ - -DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt" \ - -DLLVM_ENABLE_RUNTIMES="libc;openmp" \ + $> cmake ../llvm -G Ninja \ + -DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt" \ + -DLLVM_ENABLE_RUNTIMES="openmp" \ -DCMAKE_BUILD_TYPE= \ # Select build type - -DLIBC_GPU_BUILD=ON \ # Build in GPU mode - -DLIBC_GPU_ARCHITECTURES=all \ # Build all supported architectures - -DCMAKE_INSTALL_PREFIX= \ # Where 'libcgpu.a' will live + -DCMAKE_INSTALL_PREFIX= \ # Where 'libcgpu.a' will live + -DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=libc \ + -DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=libc \ + -DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda $> ninja install Since we want to include ``clang``, ``lld`` and ``compiler-rt`` in our @@ -40,13 +40,14 @@ toolchain, we list them in ``LLVM_ENABLE_PROJECTS``. To ensure ``libc`` is built using a compatible compiler and to support ``openmp`` offloading, we list them in ``LLVM_ENABLE_RUNTIMES`` to build them after the enabled projects using the newly built compiler. ``CMAKE_INSTALL_PREFIX`` specifies the installation -directory in which to install the ``libcgpu.a`` library and headers along with -LLVM. The generated headers will be placed in ``include/gpu-none-llvm``. +directory in which to install the ``libcgpu-nvptx.a`` and ``libcgpu-amdgpu.a`` +libraries and headers along with LLVM. The generated headers will be placed in +``include/``. Usage ===== -Once the ``libcgpu.a`` static archive has been built it can be linked directly +Once the static archive has been built it can be linked directly with offloading applications as a standard library. This process is described in the `clang documentation `_. This linking mode is used by the OpenMP toolchain, but is currently opt-in for @@ -68,7 +69,7 @@ supported target device. The supported architectures can be seen using LLVM's OFFLOADING IMAGE [0]: kind llvm ir - arch gfx90a + arch generic triple amdgcn-amd-amdhsa producer none diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index dc3c9b8e6328a..9090b3bca01e0 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -4,7 +4,7 @@ set(LIBC_INCLUDE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) include(LLVMLibCHeaderRules) # The GPU build wants to install files in the compiler's resource directory. -if(LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(LIBC_TARGET_OS_IS_GPU) include(GetClangResourceDir) endif() @@ -586,7 +586,7 @@ add_gen_header( .llvm-libc-types.wchar_t ) -if(LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(LIBC_TARGET_OS_IS_GPU) file(MAKE_DIRECTORY ${LIBC_INCLUDE_DIR}/gpu) add_gen_header( @@ -638,7 +638,7 @@ foreach(target IN LISTS all_install_header_targets) # The GPU optionally provides the supported declarations externally so # offloading languages like CUDA and OpenMP know what is supported by libc. We # install these in the compiler's resource directory at a preset location. - if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND PACKAGE_VERSION) + if(LIBC_TARGET_OS_IS_GPU AND PACKAGE_VERSION) get_target_property(decls_file ${target} DECLS_FILE_PATH) if(NOT decls_file) continue() diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt index c1a804232c1f5..615f4270646fb 100644 --- a/libc/lib/CMakeLists.txt +++ b/libc/lib/CMakeLists.txt @@ -2,11 +2,7 @@ set(libc_archive_targets "") set(libc_archive_names "") set(libc_archive_entrypoint_lists "") if(LLVM_LIBC_FULL_BUILD) - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - list(APPEND libc_archive_names cgpu mgpu) - else() - list(APPEND libc_archive_names c m) - endif() + list(APPEND libc_archive_names c m) list(APPEND libc_archive_targets libc libm) list(APPEND libc_archive_entrypoint_lists TARGET_LIBC_ENTRYPOINTS TARGET_LIBM_ENTRYPOINTS) @@ -40,6 +36,27 @@ foreach(archive IN ZIP_LISTS endif() endif() list(APPEND added_archive_targets ${archive_1}) + + # Add the offloading version of the library for offloading languages. These + # are installed in the standard search path separate from the other libraries. + if(LIBC_TARGET_OS_IS_GPU) + set(libc_gpu_archive_target ${archive_1}gpu) + set(libc_gpu_archive_name ${archive_0}gpu-${LIBC_TARGET_ARCHITECTURE}) + + add_gpu_entrypoint_library( + ${libc_gpu_archive_target} + DEPENDS + ${${archive_2}} + ) + set_target_properties( + ${libc_gpu_archive_target} + PROPERTIES + ARCHIVE_OUTPUT_NAME ${libc_gpu_archive_name} + ) + set_target_properties(${libc_gpu_archive_target} PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}) + list(APPEND added_gpu_archive_targets ${libc_gpu_archive_target}) + endif() endforeach() install( @@ -48,6 +65,14 @@ install( COMPONENT libc ) +if(LIBC_TARGET_OS_IS_GPU) + install( + TARGETS ${added_gpu_archive_targets} + ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} + COMPONENT libc + ) +endif() + if(NOT LIBC_TARGET_OS_IS_BAREMETAL) # For now we will disable libc-startup installation for baremetal. The # correct way to do it would be to make a hookable startup for baremetal diff --git a/libc/src/__support/File/CMakeLists.txt b/libc/src/__support/File/CMakeLists.txt index b3e4cc4b02779..b7c0612096aa9 100644 --- a/libc/src/__support/File/CMakeLists.txt +++ b/libc/src/__support/File/CMakeLists.txt @@ -1,5 +1,5 @@ if(NOT (TARGET libc.src.__support.threads.mutex) - OR LIBC_TARGET_ARCHITECTURE_IS_GPU) + OR LIBC_TARGET_OS_IS_GPU) # Not all platforms have a mutex implementation. If mutex is unvailable, # we just skip everything about files. return() diff --git a/libc/src/__support/GPU/CMakeLists.txt b/libc/src/__support/GPU/CMakeLists.txt index 5a899215f4b6e..d7ebd3cab7abe 100644 --- a/libc/src/__support/GPU/CMakeLists.txt +++ b/libc/src/__support/GPU/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) return() endif() diff --git a/libc/src/__support/OSUtil/CMakeLists.txt b/libc/src/__support/OSUtil/CMakeLists.txt index c19677582643e..ca3b3bf1263e0 100644 --- a/libc/src/__support/OSUtil/CMakeLists.txt +++ b/libc/src/__support/OSUtil/CMakeLists.txt @@ -9,7 +9,7 @@ if(NOT TARGET ${target_os_util}) endif() # The OSUtil is an object library in GPU mode. -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) add_header_library( osutil HDRS diff --git a/libc/src/__support/RPC/CMakeLists.txt b/libc/src/__support/RPC/CMakeLists.txt index b44a65b3732e9..183fc6f8683e0 100644 --- a/libc/src/__support/RPC/CMakeLists.txt +++ b/libc/src/__support/RPC/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) return() endif() diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 05ce51e8fc650..33dc1fc97c568 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -1,6 +1,9 @@ add_subdirectory(generic) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE}) add_subdirectory(${LIBC_TARGET_ARCHITECTURE}) +elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) + # TODO: We should split this into 'nvptx' and 'amdgpu' for the GPU build. + add_subdirectory(${LIBC_TARGET_OS}) endif() function(add_math_entrypoint_object name) @@ -8,6 +11,7 @@ function(add_math_entrypoint_object name) # that first and return early if we are able to add an alias target for the # machine specific implementation. get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.${name}" fq_machine_specific_target_name) + get_fq_target_name("${LIBC_TARGET_OS}.${name}" fq_os_specific_target_name) if(TARGET ${fq_machine_specific_target_name}) add_entrypoint_object( ${name} @@ -16,17 +20,25 @@ function(add_math_entrypoint_object name) .${LIBC_TARGET_ARCHITECTURE}.${name} ) return() + elseif(TARGET ${fq_os_specific_target_name}) + add_entrypoint_object( + ${name} + ALIAS + DEPENDS + .${LIBC_TARGET_OS}.${name} + ) + return() endif() # The GPU optionally depends on vendor libraries. If we emitted one of these # entrypoints it means the user requested it and we should use it instead. - get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.vendor.${name}" fq_vendor_specific_target_name) + get_fq_target_name("${LIBC_TARGET_OS}.vendor.${name}" fq_vendor_specific_target_name) if(TARGET ${fq_vendor_specific_target_name}) add_entrypoint_object( ${name} ALIAS DEPENDS - .${LIBC_TARGET_ARCHITECTURE}.vendor.${name} + .${LIBC_TARGET_OS}.vendor.${name} VENDOR ) return() diff --git a/libc/src/math/gpu/vendor/CMakeLists.txt b/libc/src/math/gpu/vendor/CMakeLists.txt index f699ca103b5f8..36087ade63bfc 100644 --- a/libc/src/math/gpu/vendor/CMakeLists.txt +++ b/libc/src/math/gpu/vendor/CMakeLists.txt @@ -10,7 +10,6 @@ else() "functions will be an external reference to the vendor libraries.") endif() -find_package(CUDAToolkit QUIET) if(CUDAToolkit_FOUND) set(libdevice_path ${CUDAToolkit_BIN_DIR}/../nvvm/libdevice/libdevice.10.bc) if (EXISTS ${libdevice_path}) diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 380474ce27118..bb8e41606c5df 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -22,7 +22,7 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) endif() -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/generic) endif() diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index a4d51fb9a11ee..ce08635df3145 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -316,7 +316,7 @@ if(LLVM_LIBC_INCLUDE_SCUDO) DEPENDS ${SCUDO_DEPS} ) -elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU) +elseif(LIBC_TARGET_OS_IS_GPU) add_entrypoint_external( calloc ) @@ -397,7 +397,7 @@ add_entrypoint_object( .${LIBC_TARGET_OS}.abort ) -if(LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(LIBC_TARGET_OS_IS_GPU) add_entrypoint_object( malloc ALIAS diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 6daaf1998ea7b..1c893280e8a3c 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -501,7 +501,7 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) add_bcmp(bcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW) add_bcmp(bcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_bcmp(bcmp) -elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU) +elseif(LIBC_TARGET_OS_IS_GPU) add_bcmp(bcmp) else() add_bcmp(bcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) @@ -530,7 +530,7 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) add_bzero(bzero_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_bzero(bzero) -elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU) +elseif(LIBC_TARGET_OS_IS_GPU) add_bzero(bzero) else() add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) @@ -562,7 +562,7 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcmp(memcmp) -elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU) +elseif(LIBC_TARGET_OS_IS_GPU) add_memcmp(memcmp) else() add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) @@ -598,7 +598,7 @@ elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") add_memcpy(memcpy MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") -elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU) +elseif(LIBC_TARGET_OS_IS_GPU) add_memcpy(memcpy) else() add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) @@ -632,7 +632,7 @@ elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") add_memmove(memmove MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") -elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU) +elseif(LIBC_TARGET_OS_IS_GPU) add_memmove(memmove) else() add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) @@ -667,7 +667,7 @@ elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") add_memset(memset MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") -elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU) +elseif(LIBC_TARGET_OS_IS_GPU) add_memset(memset) else() add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) diff --git a/libc/startup/gpu/CMakeLists.txt b/libc/startup/gpu/CMakeLists.txt index fa7f69f19520c..6f67fa9ff44f7 100644 --- a/libc/startup/gpu/CMakeLists.txt +++ b/libc/startup/gpu/CMakeLists.txt @@ -28,33 +28,24 @@ function(add_startup_object name) ) endfunction() -if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU) - add_subdirectory(amdgpu) - - add_startup_object( - crt1 - ALIAS - DEPENDS - .amdgpu.crt1 - ) -elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) - add_subdirectory(nvptx) - - add_startup_object( - crt1 - ALIAS - DEPENDS - .nvptx.crt1 - ) -else() - # Skip building the startup code if there are no supported GPUs. - message(STATUS "Skipping startup for gpu target, no GPUs were detected") - return() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE}) + add_subdirectory(${LIBC_TARGET_ARCHITECTURE}) endif() +add_startup_object( + crt1 + ALIAS + DEPENDS + .${LIBC_TARGET_ARCHITECTURE}.crt1 +) + add_custom_target(libc-startup) set(startup_components crt1) foreach(target IN LISTS startup_components) set(fq_target_name libc.startup.gpu.${target}) add_dependencies(libc-startup ${fq_target_name}) + install(FILES $ + DESTINATION ${LIBC_INSTALL_LIBRARY_DIR} + RENAME $ + COMPONENT libc) endforeach() diff --git a/libc/startup/gpu/amdgpu/CMakeLists.txt b/libc/startup/gpu/amdgpu/CMakeLists.txt index c9d0ee2fd0e9a..3ac104ee8ba94 100644 --- a/libc/startup/gpu/amdgpu/CMakeLists.txt +++ b/libc/startup/gpu/amdgpu/CMakeLists.txt @@ -1,6 +1,5 @@ add_startup_object( crt1 - NO_GPU_BUNDLE # Compile this file directly without special GPU handling. SRC start.cpp DEPENDS @@ -11,17 +10,5 @@ add_startup_object( COMPILE_OPTIONS -ffreestanding # To avoid compiler warnings about calling the main function. -fno-builtin - -mcode-object-version=${LIBC_GPU_CODE_OBJECT_VERSION} # Manually set the ABI. ) get_fq_target_name(crt1 fq_name) - -# Ensure that clang uses the correct linker for this object type. -target_link_libraries( - ${fq_name} - PUBLIC - "-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}" - "--target=${LIBC_GPU_TARGET_TRIPLE}" - "-flto" - "-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" - "-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}" -) diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt index 23a54516cc982..3ac104ee8ba94 100644 --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -1,6 +1,5 @@ add_startup_object( crt1 - NO_GPU_BUNDLE # Compile this file directly without special GPU handling. SRC start.cpp DEPENDS @@ -13,11 +12,3 @@ add_startup_object( -fno-builtin ) get_fq_target_name(crt1 fq_name) - -# Ensure that clang uses the correct linker for this object type. -target_link_libraries(${fq_name} - PUBLIC - "-march=${LIBC_GPU_TARGET_ARCHITECTURE}" - "--target=${LIBC_GPU_TARGET_TRIPLE}" - "--cuda-path=${LIBC_CUDA_ROOT}" -) diff --git a/libc/test/CMakeLists.txt b/libc/test/CMakeLists.txt index f22f2b183aca9..745a9a04b4af8 100644 --- a/libc/test/CMakeLists.txt +++ b/libc/test/CMakeLists.txt @@ -8,9 +8,9 @@ add_custom_target(libc-long-running-tests) add_subdirectory(UnitTest) -if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND - (NOT TARGET libc.utils.gpu.loader OR NOT TARGET libc.startup.gpu.crt1)) - message(WARNING "Cannot build libc GPU tests, missing loader implementation") +if(LIBC_TARGET_OS_IS_GPU AND + (NOT TARGET libc.utils.gpu.loader OR LIBC_GPU_TESTS_DISABLED)) + message(WARNING "Cannot build libc GPU tests, missing loader or architecture") return() endif() diff --git a/libc/test/IntegrationTest/CMakeLists.txt b/libc/test/IntegrationTest/CMakeLists.txt index dca4c5a6f1b14..4f31f10b29f0b 100644 --- a/libc/test/IntegrationTest/CMakeLists.txt +++ b/libc/test/IntegrationTest/CMakeLists.txt @@ -1,21 +1,5 @@ -if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU) - set(TEST_COMPILE_FLAGS - -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} - -emit-llvm # AMDGPU's intermediate object file format is bitcode. - --target=${LIBC_GPU_TARGET_TRIPLE} - -mcode-object-version=${LIBC_GPU_CODE_OBJECT_VERSION} # Manually set the ABI. - ) -elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) - set(TEST_COMPILE_FLAGS - -march=${LIBC_GPU_TARGET_ARCHITECTURE} - --target=${LIBC_GPU_TARGET_TRIPLE} - --cuda-path=${LIBC_CUDA_ROOT} - ) -endif() - add_object_library( test - NO_GPU_BUNDLE # Compile this file directly without special GPU handling. SRCS test.cpp COMPILE_OPTIONS diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 4a615d4bd5e1c..4668f0061975f 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -12,7 +12,7 @@ function(add_unittest_framework_library name) endif() # The Nvidia 'nvlink' linker does not support static libraries. - if(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) + if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) set(library_type OBJECT) else() set(library_type STATIC) diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 9801621e6b399..53fa1323d18b7 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -1,7 +1,7 @@ add_custom_target(libc-support-tests) # FIXME: These tests are currently broken on the GPU. -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) add_libc_test( blockstore_test SUITE @@ -76,7 +76,7 @@ add_libc_test( ) # The GPU does not support varargs currently. -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) add_libc_test( arg_list_test SUITE @@ -88,8 +88,7 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) ) endif() -# FIXME: Crash in NVPTX target lowering for calls -if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) +if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_libc_test( uint_test SUITE @@ -159,29 +158,33 @@ add_libc_test( libc.src.__support.memory_size ) -add_executable( - libc_str_to_float_comparison_test - str_to_float_comparison_test.cpp -) +# FIXME: We shouldn't have regular executables created because we could be +# cross-compiling the tests and running through an emulator. +if(NOT LIBC_TARGET_OS_IS_GPU) + add_executable( + libc_str_to_float_comparison_test + str_to_float_comparison_test.cpp + ) -target_link_libraries(libc_str_to_float_comparison_test - PRIVATE - "${LIBC_TARGET}" -) + target_link_libraries(libc_str_to_float_comparison_test + PRIVATE + "${LIBC_TARGET}" + ) -add_executable( - libc_system_str_to_float_comparison_test - str_to_float_comparison_test.cpp -) + add_executable( + libc_system_str_to_float_comparison_test + str_to_float_comparison_test.cpp + ) -set(float_test_file ${CMAKE_CURRENT_SOURCE_DIR}/str_to_float_comparison_data.txt) + set(float_test_file ${CMAKE_CURRENT_SOURCE_DIR}/str_to_float_comparison_data.txt) -add_custom_command(TARGET libc_str_to_float_comparison_test - POST_BUILD - COMMAND $ ${float_test_file} - DEPENDS ${float_test_file} - COMMENT "Test the strtof and strtod implementations against precomputed results." - VERBATIM) + add_custom_command(TARGET libc_str_to_float_comparison_test + POST_BUILD + COMMAND $ ${float_test_file} + DEPENDS ${float_test_file} + COMMENT "Test the strtof and strtod implementations against precomputed results." + VERBATIM) +endif() add_subdirectory(CPP) add_subdirectory(File) diff --git a/libc/test/src/__support/CPP/CMakeLists.txt b/libc/test/src/__support/CPP/CMakeLists.txt index 6927579289bc2..d7f332f5b0fbd 100644 --- a/libc/test/src/__support/CPP/CMakeLists.txt +++ b/libc/test/src/__support/CPP/CMakeLists.txt @@ -64,7 +64,7 @@ add_libc_test( # This test fails with invalid address space operations on sm_60 -if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) +if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_libc_test( atomic_test SUITE diff --git a/libc/test/src/__support/File/CMakeLists.txt b/libc/test/src/__support/File/CMakeLists.txt index f193480c60c2b..9191469b4927c 100644 --- a/libc/test/src/__support/File/CMakeLists.txt +++ b/libc/test/src/__support/File/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT (TARGET libc.src.__support.threads.mutex) OR LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT (TARGET libc.src.__support.threads.mutex) OR LIBC_TARGET_OS_IS_GPU) # Not all platforms have a mutex implementation. If mutex is unvailable, # we just skip everything about files. The GPU does not currently support # files as well. diff --git a/libc/test/src/errno/CMakeLists.txt b/libc/test/src/errno/CMakeLists.txt index 633d46a1f5f88..b73962fb4de4d 100644 --- a/libc/test/src/errno/CMakeLists.txt +++ b/libc/test/src/errno/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT LLVM_LIBC_FULL_BUILD OR LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LLVM_LIBC_FULL_BUILD OR LIBC_TARGET_OS_IS_GPU) return() endif() diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 8c105515e3525..81d2e1e55b552 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1,10 +1,14 @@ add_custom_target(libc-math-unittests) -add_library( - libc_math_test_utils - RandUtils.cpp - RandUtils.h -) +# FIXME: We shouldn't have regular libraries created because we could be +# cross-compiling the tests and running through an emulator. +if(NOT LIBC_TARGET_OS_IS_GPU) + add_library( + libc_math_test_utils + RandUtils.cpp + RandUtils.h + ) +endif() add_fp_unittest( cosf_test @@ -755,7 +759,7 @@ add_fp_unittest( ) # FIXME: These tests are currently broken for NVPTX. -if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) +if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_fp_unittest( ilogb_test SUITE @@ -986,7 +990,7 @@ add_fp_unittest( ) # FIXME: These tests are currently broken on the GPU. -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) add_fp_unittest( fminf_test SUITE @@ -1231,7 +1235,7 @@ add_fp_unittest( ) # FIXME: These tests are currently spurious for NVPTX. -if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) +if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_fp_unittest( nextafter_test SUITE diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 1824c672cb974..2d24b5a76b013 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -819,7 +819,7 @@ add_fp_unittest( ) # FIXME: These tests are currently broken for NVPTX. -if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) +if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_fp_unittest( ilogb_test SUITE @@ -1073,7 +1073,7 @@ add_fp_unittest( ) # FIXME: These tests are currently broken on the GPU. -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) add_fp_unittest( fminf_test SUITE @@ -1417,7 +1417,7 @@ add_fp_unittest( ) # FIXME: These tests are currently spurious for NVPTX. -if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) +if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_fp_unittest( nextafter_test SUITE @@ -1465,7 +1465,7 @@ add_fp_unittest( ) # FIXME: These tests are currently spurious for the GPU. -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_GPU) add_fp_unittest( nexttoward_test SUITE diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index 8db2293ab74a9..93c21aa994ef4 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -430,7 +430,7 @@ add_libc_test( # Create an output directory for any temporary test files. file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testdata) -if(LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(LIBC_TARGET_OS_IS_GPU) return() endif() diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt index da07dbbe79772..5826cfe8d4ca3 100644 --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -55,7 +55,7 @@ add_libc_test( ) # This fails on NVPTX where the output value is one-off of the expected value. -if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) +if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_fp_unittest( strtod_test SUITE @@ -127,7 +127,7 @@ add_libc_test( ) # This fails on NVPTX where the output value is one-off of the expected value. -if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) +if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_libc_test( strtold_test SUITE @@ -339,7 +339,7 @@ if(LLVM_LIBC_FULL_BUILD) ) # Only the GPU has an in-tree 'malloc' implementation. - if(LIBC_TARGET_ARCHITECTURE_IS_GPU) + if(LIBC_TARGET_OS_IS_GPU) add_libc_test( malloc_test HERMETIC_TEST_ONLY diff --git a/libc/test/utils/UnitTest/CMakeLists.txt b/libc/test/utils/UnitTest/CMakeLists.txt index 6f61e0ffefb00..3b917e06cde21 100644 --- a/libc/test/utils/UnitTest/CMakeLists.txt +++ b/libc/test/utils/UnitTest/CMakeLists.txt @@ -1,4 +1,4 @@ -if(LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(LIBC_TARGET_OS_IS_GPU) return() endif() diff --git a/libc/utils/CMakeLists.txt b/libc/utils/CMakeLists.txt index 9754dcf3854aa..7bf02a4af7dea 100644 --- a/libc/utils/CMakeLists.txt +++ b/libc/utils/CMakeLists.txt @@ -1,6 +1,6 @@ if(LLVM_INCLUDE_TESTS) add_subdirectory(MPFRWrapper) endif() -if(LIBC_TARGET_ARCHITECTURE_IS_GPU) +if(LIBC_TARGET_OS_IS_GPU) add_subdirectory(gpu) endif() diff --git a/libc/utils/MPFRWrapper/CMakeLists.txt b/libc/utils/MPFRWrapper/CMakeLists.txt index adc073c9a91f5..6f44ca0d786c8 100644 --- a/libc/utils/MPFRWrapper/CMakeLists.txt +++ b/libc/utils/MPFRWrapper/CMakeLists.txt @@ -24,6 +24,6 @@ if(LIBC_TESTS_CAN_USE_MPFR) target_link_directories(libcMPFRWrapper PUBLIC ${LLVM_LIBC_MPFR_INSTALL_PATH}/lib) endif() target_link_libraries(libcMPFRWrapper PUBLIC LibcFPTestHelpers.unit LibcTest.unit mpfr gmp) -elseif(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) +elseif(NOT LIBC_TARGET_OS_IS_GPU) message(WARNING "Math tests using MPFR will be skipped.") endif() diff --git a/libc/utils/gpu/CMakeLists.txt b/libc/utils/gpu/CMakeLists.txt index 7c15f36052cf3..4d1ebcfb9f8e6 100644 --- a/libc/utils/gpu/CMakeLists.txt +++ b/libc/utils/gpu/CMakeLists.txt @@ -1,2 +1,4 @@ add_subdirectory(server) -add_subdirectory(loader) +if(LIBC_TARGET_OS_IS_GPU) + add_subdirectory(loader) +endif() diff --git a/libc/utils/gpu/loader/CMakeLists.txt b/libc/utils/gpu/loader/CMakeLists.txt index f195b887c9af6..189460bb02e6e 100644 --- a/libc/utils/gpu/loader/CMakeLists.txt +++ b/libc/utils/gpu/loader/CMakeLists.txt @@ -1,31 +1,30 @@ add_library(gpu_loader OBJECT Main.cpp) + target_include_directories(gpu_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${LIBC_SOURCE_DIR}/include ${LIBC_SOURCE_DIR} ) +# This utility needs to be compiled for the host system when cross compiling. +if(LLVM_RUNTIMES_TARGET OR LIBC_TARGET_TRIPLE) + target_compile_options(gpu_loader PUBLIC --target=${LLVM_HOST_TRIPLE}) + target_link_libraries(gpu_loader PUBLIC "--target=${LLVM_HOST_TRIPLE}") +endif() + find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm) -if(hsa-runtime64_FOUND) +if(hsa-runtime64_FOUND AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) add_subdirectory(amdgpu) -else() +elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) message(STATUS "Skipping HSA loader for gpu target, no HSA was detected") endif() -find_package(CUDAToolkit QUIET) # The CUDA loader requires LLVM to traverse the ELF image for symbols. find_package(LLVM QUIET) -if(CUDAToolkit_FOUND AND LLVM_FOUND AND - "${CUDAToolkit_VERSION}" VERSION_GREATER_EQUAL "11.2") +if(CUDAToolkit_FOUND AND LLVM_FOUND AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_subdirectory(nvptx) -else() - if("${CUDAToolkit_VERSION}" VERSION_LESS "11.2") - message(WARNING - "Skipping CUDA loader for gpu target, CUDA must be version 11.2 or later. - Found CUDA Version ${CUDAToolkit_VERSION}") - else() - message(STATUS "Skipping CUDA loader for gpu target, no CUDA was detected") - endif() +elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) + message(STATUS "Skipping CUDA loader for gpu target, no CUDA was detected") endif() # Add a custom target to be used for testing. @@ -37,20 +36,31 @@ if(LIBC_GPU_LOADER_EXECUTABLE) PROPERTIES EXECUTABLE "${LIBC_GPU_LOADER_EXECUTABLE}" ) -elseif(TARGET amdhsa_loader AND LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU) +elseif(TARGET amdhsa-loader AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) add_custom_target(libc.utils.gpu.loader) - add_dependencies(libc.utils.gpu.loader amdhsa_loader) + add_dependencies(libc.utils.gpu.loader amdhsa-loader) set_target_properties( libc.utils.gpu.loader PROPERTIES - EXECUTABLE "$" + TARGET amdhsa-loader + EXECUTABLE "$" ) -elseif(TARGET nvptx_loader AND LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) +elseif(TARGET nvptx-loader AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_custom_target(libc.utils.gpu.loader) - add_dependencies(libc.utils.gpu.loader nvptx_loader) + add_dependencies(libc.utils.gpu.loader nvptx-loader) set_target_properties( libc.utils.gpu.loader PROPERTIES - EXECUTABLE "$" + TARGET nvptx-loader + EXECUTABLE "$" ) endif() + +if(TARGET libc.utils.gpu.loader) + get_target_property(gpu_loader_tgt libc.utils.gpu.loader "TARGET") + if(gpu_loader_tgt) + install(TARGETS ${gpu_loader_tgt} + DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT libc) + endif() +endif() diff --git a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt index 8e9c9a2bdc7d2..b99319f504011 100644 --- a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt +++ b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt @@ -1,7 +1,7 @@ -add_executable(amdhsa_loader Loader.cpp) -add_dependencies(amdhsa_loader libc.src.__support.RPC.rpc) +add_executable(amdhsa-loader Loader.cpp) +add_dependencies(amdhsa-loader libc.src.__support.RPC.rpc) -target_link_libraries(amdhsa_loader +target_link_libraries(amdhsa-loader PRIVATE hsa-runtime64::hsa-runtime64 gpu_loader diff --git a/libc/utils/gpu/loader/nvptx/CMakeLists.txt b/libc/utils/gpu/loader/nvptx/CMakeLists.txt index 0c76c49fa3098..e76362a1e8cca 100644 --- a/libc/utils/gpu/loader/nvptx/CMakeLists.txt +++ b/libc/utils/gpu/loader/nvptx/CMakeLists.txt @@ -1,11 +1,11 @@ -add_executable(nvptx_loader Loader.cpp) -add_dependencies(nvptx_loader libc.src.__support.RPC.rpc) +add_executable(nvptx-loader Loader.cpp) +add_dependencies(nvptx-loader libc.src.__support.RPC.rpc) if(NOT LLVM_ENABLE_RTTI) - target_compile_options(nvptx_loader PRIVATE -fno-rtti) + target_compile_options(nvptx-loader PRIVATE -fno-rtti) endif() -target_include_directories(nvptx_loader PRIVATE ${LLVM_INCLUDE_DIRS}) -target_link_libraries(nvptx_loader +target_include_directories(nvptx-loader PRIVATE ${LLVM_INCLUDE_DIRS}) +target_link_libraries(nvptx-loader PRIVATE gpu_loader llvmlibc_rpc_server diff --git a/libc/utils/gpu/server/CMakeLists.txt b/libc/utils/gpu/server/CMakeLists.txt index 3d9b2bcab4dbc..94cdfe5bf6521 100644 --- a/libc/utils/gpu/server/CMakeLists.txt +++ b/libc/utils/gpu/server/CMakeLists.txt @@ -5,12 +5,21 @@ target_include_directories(llvmlibc_rpc_server PRIVATE ${LIBC_SOURCE_DIR}) target_include_directories(llvmlibc_rpc_server PUBLIC ${LIBC_SOURCE_DIR}/include) target_include_directories(llvmlibc_rpc_server PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + # Ignore unsupported clang attributes if we're using GCC. target_compile_options(llvmlibc_rpc_server PUBLIC $<$:-Wno-attributes>) target_compile_definitions(llvmlibc_rpc_server PUBLIC LIBC_NAMESPACE=${LIBC_NAMESPACE}) +# This utility needs to be compiled for the host system when cross compiling. +if(LLVM_RUNTIMES_TARGET OR LIBC_TARGET_TRIPLE) + target_compile_options(llvmlibc_rpc_server PUBLIC + --target=${LLVM_HOST_TRIPLE}) + target_link_libraries(llvmlibc_rpc_server PUBLIC + "--target=${LLVM_HOST_TRIPLE}") +endif() + # Install the server and associated header. install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/rpc_server.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/gpu-none-llvm/ diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index dbd5fbf226bd5..f5f7d3f3253fd 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -175,7 +175,9 @@ else() foreach(_name ${LLVM_RUNTIME_TARGETS}) if("libc" IN_LIST RUNTIMES_${_name}_LLVM_ENABLE_RUNTIMES) set(NEED_LIBC_HDRGEN TRUE) - break() + if("${_name}" STREQUAL "amdgcn-amd-amdhsa" OR "${_name}" STREQUAL "nvptx64-nvidia-cuda") + set(LLVM_LIBC_GPU_BUILD ON) + endif() endif() endforeach() endif() diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 486df22c2c1bb..4257083e53ad4 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -120,6 +120,13 @@ if( LLVM_ENABLE_ASSERTIONS ) endif() endif() +# If we are targeting a GPU architecture we want to ignore all the standard +# flag handling. +if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" OR + "${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx64") + return() +endif() + if(LLVM_ENABLE_EXPENSIVE_CHECKS) add_compile_definitions(EXPENSIVE_CHECKS) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 8c48d85a4346f..9b5e758b6ede5 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -199,7 +199,7 @@ foreach(entry ${runtimes}) list(APPEND prefixes "LLVM_LIBC") list(APPEND prefixes "LIBC_") # The `libc` project may require '-DCUDAToolkit_ROOT' in GPU mode. - if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) + if(LLVM_LIBC_GPU_BUILD) list(APPEND prefixes "CUDA") endif() endif() @@ -424,7 +424,7 @@ if(runtimes) endforeach() endif() if("libc" IN_LIST LLVM_ENABLE_PROJECTS AND - (LLVM_LIBC_FULL_BUILD OR LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES)) + (LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD)) if(LIBC_HDRGEN_EXE) set(hdrgen_exe ${LIBC_HDRGEN_EXE}) else() @@ -441,7 +441,12 @@ if(runtimes) set(libc_cmake_args "-DLIBC_HDRGEN_EXE=${hdrgen_exe}" "-DLLVM_LIBC_FULL_BUILD=ON") list(APPEND extra_deps ${hdrgen_deps}) - if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) + if(LLVM_LIBC_GPU_BUILD) + list(APPEND libc_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON") + # The `libc` project may require '-DCUDAToolkit_ROOT' in GPU mode. + if(CUDAToolkit_ROOT) + list(APPEND libc_cmake_args "-DCUDAToolkit_ROOT=${CUDAToolkit_ROOT}") + endif() foreach(dep clang-offload-packager nvptx-arch amdgpu-arch) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt index 17e61d0bc47dc..a74eff0c0bebf 100644 --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -119,14 +119,7 @@ endif() pythonize_bool(LIBOMPTARGET_OMPT_SUPPORT) -# Check if this build supports the GPU libc. -set(LIBC_GPU_SUPPORT FALSE) -if("libc" IN_LIST LLVM_ENABLE_RUNTIMES AND (LIBC_GPU_BUILD OR - LIBC_GPU_ARCHITECTURES)) - set(LIBC_GPU_SUPPORT TRUE) -endif() - -set(LIBOMPTARGET_GPU_LIBC_SUPPORT ${LIBC_GPU_SUPPORT} CACHE BOOL +set(LIBOMPTARGET_GPU_LIBC_SUPPORT ${LLVM_LIBC_GPU_BUILD} CACHE BOOL "Libomptarget support for the GPU libc") pythonize_bool(LIBOMPTARGET_GPU_LIBC_SUPPORT) diff --git a/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt index 8ae3ff2a6d291..085d443071650 100644 --- a/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt +++ b/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt @@ -73,8 +73,12 @@ elseif(${LIBOMPTARGET_GPU_LIBC_SUPPORT}) find_library(llvmlibc_rpc_server NAMES llvmlibc_rpc_server PATHS ${LIBOMPTARGET_LLVM_LIBRARY_DIR} NO_DEFAULT_PATH) if(llvmlibc_rpc_server) - target_link_libraries(PluginCommon PRIVATE llvmlibc_rpc_server) + target_link_libraries(PluginCommon PRIVATE ${llvmlibc_rpc_server}) target_compile_definitions(PluginCommon PRIVATE LIBOMPTARGET_RPC_SUPPORT) + # We may need to get the headers directly from the 'libc' source directory. + target_include_directories(PluginCommon PRIVATE + ${CMAKE_SOURCE_DIR}/../libc/utils/gpu/server + ${CMAKE_SOURCE_DIR}/../libc/include) endif() endif() diff --git a/openmp/libomptarget/plugins-nextgen/common/src/RPC.cpp b/openmp/libomptarget/plugins-nextgen/common/src/RPC.cpp index 54aced11b31c3..cb6a5086bc4dd 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/RPC.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/RPC.cpp @@ -18,7 +18,8 @@ #if __has_include() #include #elif defined(LIBOMPTARGET_RPC_SUPPORT) -#include +// Just pull this out of the source if available. +#include "rpc_server.h" #endif using namespace llvm; diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg index 565556e64ff29..6c590603079c4 100644 --- a/openmp/libomptarget/test/lit.cfg +++ b/openmp/libomptarget/test/lit.cfg @@ -180,8 +180,12 @@ def remove_suffix_if_present(name): def add_libraries(source): if config.libomptarget_has_libc: - return source + " " + config.llvm_library_dir + "/libcgpu.a " + \ - config.llvm_library_intdir + "/libomptarget.devicertl.a" + if config.libomptarget_current_target.startswith('nvptx'): + return source + " " + config.llvm_library_dir + "/libcgpu-nvptx.a " + \ + config.llvm_library_intdir + "/libomptarget.devicertl.a" + elif config.libomptarget_current_target.startswith('amdgcn'): + return source + " " + config.llvm_library_dir + "/libcgpu-amdgpu.a " + \ + config.llvm_library_intdir + "/libomptarget.devicertl.a" return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a" # substitutions