diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index 3d77573661674..88f586344fed1 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -43,7 +43,7 @@ set(LIBC_NAMESPACE "__llvm_libc_${LLVM_VERSION_MAJOR}_${LLVM_VERSION_MINOR}_${LL CACHE STRING "The namespace to use to enclose internal implementations. Must start with '__llvm_libc'." ) -if(LLVM_LIBC_FULL_BUILD OR LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) +if(LLVM_LIBC_FULL_BUILD OR LIBC_GPU_BUILD) if(NOT LIBC_HDRGEN_EXE) # We need to set up hdrgen first since other targets depend on it. add_subdirectory(utils/LibcTableGenUtil) @@ -65,7 +65,7 @@ if(("libc" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT LLVM_RUNTIMES_BUILD) OR # to build libc-hdrgen and return. # Always make the RPC server availible to other projects for GPU mode. - if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) + if(LIBC_GPU_BUILD) add_subdirectory(utils/gpu/server) endif() return() diff --git a/libc/cmake/modules/LLVMLibCArchitectures.cmake b/libc/cmake/modules/LLVMLibCArchitectures.cmake index 623ed774be727..79020ee3a65bc 100644 --- a/libc/cmake/modules/LLVMLibCArchitectures.cmake +++ b/libc/cmake/modules/LLVMLibCArchitectures.cmake @@ -6,7 +6,7 @@ # platform. # ------------------------------------------------------------------------------ -if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) +if(LIBC_GPU_BUILD) # We set the generic target and OS to "gpu" here. More specific defintions # for the exact target GPU are set up in prepare_libc_gpu_build.cmake. set(LIBC_TARGET_OS "gpu") diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake index ef1f24863f61a..9d524d55033a6 100644 --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -100,44 +100,11 @@ endfunction() # for the features we wish to use on that target. The minimum PTX features used # here roughly corresponds to the CUDA 9.0 release. # Adjust as needed for desired PTX features. -function(get_nvptx_compile_options output_var gpu_arch) +function(get_nvptx_compile_options output_var) set(nvptx_options "") - list(APPEND nvptx_options "-march=${gpu_arch}") list(APPEND nvptx_options "-Wno-unknown-cuda-version") list(APPEND nvptx_options "SHELL:-mllvm -nvptx-emit-init-fini-kernel=false") - if(${gpu_arch} STREQUAL "sm_35") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_37") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_50") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_52") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_53") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_60") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_61") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_62") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_70") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_72") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_75") - list(APPEND nvptx_options "--cuda-feature=+ptx63") - elseif(${gpu_arch} STREQUAL "sm_80") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - elseif(${gpu_arch} STREQUAL "sm_86") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - elseif(${gpu_arch} STREQUAL "sm_89") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - elseif(${gpu_arch} STREQUAL "sm_90") - list(APPEND nvptx_options "--cuda-feature=+ptx72") - else() - message(FATAL_ERROR "Unknown Nvidia GPU architecture '${gpu_arch}'") - endif() + list(APPEND nvptx_options "--cuda-feature=+ptx63") if(LIBC_CUDA_ROOT) list(APPEND nvptx_options "--cuda-path=${LIBC_CUDA_ROOT}") @@ -147,16 +114,16 @@ endfunction() # Build the object target for a single GPU arch. # Usage: -# _build_gpu_object_for_single_arch( +# _build_gpu_object_for_single_target( # -# +# # SRCS # HDRS # DEPENDS # COMPILE_OPTIONS # FLAGS # ) -function(_build_gpu_object_for_single_arch fq_target_name gpu_arch) +function(_build_gpu_object_for_single_target fq_target_name gpu_target) cmake_parse_arguments( "ADD_GPU_OBJ" "" # No optional arguments @@ -170,20 +137,16 @@ function(_build_gpu_object_for_single_arch fq_target_name gpu_arch) endif() set(compile_options ${ADD_GPU_OBJ_COMPILE_OPTIONS}) - # Derive the triple from the specified architecture. - if("${gpu_arch}" IN_LIST all_amdgpu_architectures) - set(gpu_target_triple ${AMDGPU_TARGET_TRIPLE}) - list(APPEND compile_options "-mcpu=${gpu_arch}") + if("${gpu_target}" STREQUAL ${AMDGPU_TARGET_TRIPLE}) list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none") list(APPEND compile_options "-emit-llvm") - elseif("${gpu_arch}" IN_LIST all_nvptx_architectures) - set(gpu_target_triple ${NVPTX_TARGET_TRIPLE}) - get_nvptx_compile_options(nvptx_options ${gpu_arch}) + elseif("${gpu_target}" STREQUAL ${NVPTX_TARGET_TRIPLE}) + get_nvptx_compile_options(nvptx_options) list(APPEND compile_options "${nvptx_options}") else() - message(FATAL_ERROR "Unknown GPU architecture '${gpu_arch}'") + message(FATAL_ERROR "Unknown GPU architecture '${gpu_target}'") endif() - list(APPEND compile_options "--target=${gpu_target_triple}") + list(APPEND compile_options "--target=${gpu_target}") # Build the library for this target architecture. We always emit LLVM-IR for # packaged GPU binaries. @@ -202,7 +165,7 @@ function(_build_gpu_object_for_single_arch fq_target_name gpu_arch) add_dependencies(${fq_target_name} ${ADD_GPU_OBJ_DEPENDS}) set_target_properties(${fq_target_name} PROPERTIES DEPS "${ADD_GPU_OBJ_DEPENDS}") endif() -endfunction(_build_gpu_object_for_single_arch) +endfunction(_build_gpu_object_for_single_target) # Build the object target for the GPU. # This compiles the target for all supported architectures and embeds it into @@ -232,13 +195,13 @@ function(_build_gpu_object_bundle fq_target_name) foreach(add_gpu_obj_src ${ADD_GPU_OBJ_SRCS}) # The packaged version will be built for every target GPU architecture. We do # this so we can support multiple accelerators on the same machine. - foreach(gpu_arch ${LIBC_GPU_ARCHITECTURES}) + foreach(gpu_target ${NVPTX_TARGET_TRIPLE} ${AMDGPU_TARGET_TRIPLE}) get_filename_component(src_name ${add_gpu_obj_src} NAME) - set(gpu_target_name ${fq_target_name}.${src_name}.${gpu_arch}) + set(gpu_target_name ${fq_target_name}.${src_name}.${gpu_target}) - _build_gpu_object_for_single_arch( + _build_gpu_object_for_single_target( ${gpu_target_name} - ${gpu_arch} + ${gpu_target} CXX_STANDARD ${ADD_GPU_OBJ_CXX_STANDARD} HDRS ${ADD_GPU_OBJ_HDRS} SRCS ${add_gpu_obj_src} @@ -249,15 +212,15 @@ function(_build_gpu_object_bundle fq_target_name) ) # Append this target to a list of images to package into a single binary. set(input_file $) - if("${gpu_arch}" IN_LIST all_nvptx_architectures) + if("${gpu_target}" STREQUAL "${NVPTX_TARGET_TRIPLE}") get_nvptx_compile_options(nvptx_options ${gpu_arch}) string(REGEX MATCH "\\+ptx[0-9]+" nvptx_ptx_feature ${nvptx_options}) list(APPEND packager_images - --image=file=${input_file},arch=${gpu_arch},triple=${NVPTX_TARGET_TRIPLE},feature=${nvptx_ptx_feature}) + --image=file=${input_file},arch=generic,triple=${NVPTX_TARGET_TRIPLE},feature=${nvptx_ptx_feature}) else() list(APPEND packager_images - --image=file=${input_file},arch=${gpu_arch},triple=${AMDGPU_TARGET_TRIPLE}) - endif() + --image=file=${input_file},arch=generic,triple=${AMDGPU_TARGET_TRIPLE}) + endif() list(APPEND gpu_target_objects ${input_file}) endforeach() @@ -386,13 +349,14 @@ function(create_object_library fq_target_name) endif() # When the target for GPU is not bundled, internal_target_name is the same # as fq_targetname - _build_gpu_object_for_single_arch( + _build_gpu_object_for_single_target( ${internal_target_name} - ${LIBC_GPU_TARGET_ARCHITECTURE} + ${LIBC_GPU_TARGET_TRIPLE} SRCS ${ADD_OBJECT_SRCS} HDRS ${ADD_OBJECT_HDRS} CXX_STANDARD ${ADD_OBJECT_CXX_STANDARD} - COMPILE_OPTIONS ${compile_options} ${public_packaging_for_internal} + COMPILE_OPTIONS ${compile_options} -march=${LIBC_GPU_TARGET_ARCHITECTURE} + ${public_packaging_for_internal} DEPENDS ${fq_deps_list} ) else() @@ -598,12 +562,13 @@ function(create_entrypoint_object fq_target_name) DEPENDS ${full_deps_list} FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" ) - _build_gpu_object_for_single_arch( + _build_gpu_object_for_single_target( ${internal_target_name} - ${LIBC_GPU_TARGET_ARCHITECTURE} + ${LIBC_GPU_TARGET_TRIPLE} SRCS ${ADD_ENTRYPOINT_OBJ_SRCS} HDRS ${ADD_ENTRYPOINT_OBJ_HDRS} - COMPILE_OPTIONS ${common_compile_options} + COMPILE_OPTIONS -march=${LIBC_GPU_TARGET_ARCHITECTURE} + ${common_compile_options} CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} DEPENDS ${full_deps_list} FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake index 5b96c5e9f8c80..e30cc512a9885 100644 --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -477,8 +477,9 @@ function(add_integration_test test_name) -flto --target=${LIBC_GPU_TARGET_TRIPLE} -mcode-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}) elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) - get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE}) + get_nvptx_compile_options(nvptx_options) target_compile_options(${fq_build_target_name} PRIVATE + -march=${LIBC_GPU_TARGET_ARCHITECTURE} -nogpulib ${nvptx_options} -fno-use-cxa-atexit --target=${LIBC_GPU_TARGET_TRIPLE}) endif() @@ -539,9 +540,10 @@ if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU) --target=${LIBC_GPU_TARGET_TRIPLE} -mcode-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}) elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) - get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE}) + get_nvptx_compile_options(nvptx_options) list(APPEND LIBC_HERMETIC_TEST_COMPILE_OPTIONS - -nogpulib ${nvptx_options} -fno-use-cxa-atexit --target=${LIBC_GPU_TARGET_TRIPLE}) + -nogpulib ${nvptx_options} -fno-use-cxa-atexit + -march=${LIBC_GPU_TARGET_ARCHITECTURE} --target=${LIBC_GPU_TARGET_TRIPLE}) endif() # Rule to add a hermetic test. A hermetic test is one whose executable is fully diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake index 2086175bae6c7..4fcfce7036719 100644 --- a/libc/cmake/modules/prepare_libc_gpu_build.cmake +++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake @@ -13,8 +13,6 @@ set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62" "sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90") set(all_gpu_architectures "${all_amdgpu_architectures};${all_nvptx_architectures}") -set(LIBC_GPU_ARCHITECTURES "all" CACHE STRING - "List of GPU architectures to build the libc for.") set(AMDGPU_TARGET_TRIPLE "amdgcn-amd-amdhsa") set(NVPTX_TARGET_TRIPLE "nvptx64-nvidia-cuda") @@ -54,17 +52,6 @@ foreach(arch_tool ${LIBC_NVPTX_ARCH} ${LIBC_AMDGPU_ARCH}) endforeach() list(REMOVE_DUPLICATES detected_gpu_architectures) -if(LIBC_GPU_ARCHITECTURES STREQUAL "all") - set(LIBC_GPU_ARCHITECTURES ${all_gpu_architectures}) -elseif(LIBC_GPU_ARCHITECTURES STREQUAL "native") - if(NOT detected_gpu_architectures) - message(FATAL_ERROR "No GPUs found on the system when using 'native'") - endif() - set(LIBC_GPU_ARCHITECTURES ${detected_gpu_architectures}) -endif() -message(STATUS "Building libc for the following GPU architecture(s): " - "${LIBC_GPU_ARCHITECTURES}") - # Identify the program used to package multiple images into a single binary. find_program(LIBC_CLANG_OFFLOAD_PACKAGER NAMES clang-offload-packager NO_DEFAULT_PATH @@ -98,16 +85,11 @@ elseif(detected_gpu_architectures) message(STATUS "Using GPU architecture detected on the system for testing: " "'${gpu_test_architecture}'") else() - list(LENGTH LIBC_GPU_ARCHITECTURES n_gpu_archs) - if (${n_gpu_archs} EQUAL 1) - set(gpu_test_architecture ${LIBC_GPU_ARCHITECTURES}) - message(STATUS "Using user-specified GPU architecture for testing: " - "'${gpu_test_architecture}'") - else() - message(STATUS "No GPU architecture set for testing. GPU tests will not be " - "availibe. Set 'LIBC_GPU_TEST_ARCHITECTURE' to override.") - return() - endif() + # FIXME: This logic is broken, just default to some value for now so it + # builds correctly. This will be reworked in the future. + list(GET all_gpu_architectures 0 gpu_test_architecture) + message(STATUS "No GPU architecture set for testing. GPU tests will not be " + "availibe. Set 'LIBC_GPU_TEST_ARCHITECTURE' to override.") endif() if("${gpu_test_architecture}" IN_LIST all_amdgpu_architectures) diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst index 71f5e7ba20393..f4760f97afb96 100644 --- a/libc/docs/gpu/using.rst +++ b/libc/docs/gpu/using.rst @@ -16,10 +16,8 @@ LLVM's libc GPU support *must* be built with an up-to-date ``clang`` compiler due to heavy reliance on ``clang``'s GPU support. This can be done automatically using the ``LLVM_ENABLE_RUNTIMES=libc`` option. To enable libc for the GPU, enable the ``LIBC_GPU_BUILD`` option. By default, ``libcgpu.a`` will be built -using every supported GPU architecture. To restrict the number of architectures -build, either set ``LIBC_GPU_ARCHITECTURES`` to the list of desired -architectures manually or use ``native`` to detect the GPUs on your system. A -typical ``cmake`` configuration will look like this: +targeting the NVPTX and AMDGPU implementations. A typical ``cmake`` +configuration will look like this: .. code-block:: sh @@ -31,7 +29,6 @@ typical ``cmake`` configuration will look like this: -DLLVM_ENABLE_RUNTIMES="libc;openmp" \ -DCMAKE_BUILD_TYPE= \ # Select build type -DLIBC_GPU_BUILD=ON \ # Build in GPU mode - -DLIBC_GPU_ARCHITECTURES=all \ # Build all supported architectures -DCMAKE_INSTALL_PREFIX= \ # Where 'libcgpu.a' will live $> ninja install diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 8c48d85a4346f..188755b466b3f 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -199,7 +199,7 @@ foreach(entry ${runtimes}) list(APPEND prefixes "LLVM_LIBC") list(APPEND prefixes "LIBC_") # The `libc` project may require '-DCUDAToolkit_ROOT' in GPU mode. - if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) + if(LIBC_GPU_BUILD) list(APPEND prefixes "CUDA") endif() endif() @@ -424,7 +424,7 @@ if(runtimes) endforeach() endif() if("libc" IN_LIST LLVM_ENABLE_PROJECTS AND - (LLVM_LIBC_FULL_BUILD OR LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES)) + (LLVM_LIBC_FULL_BUILD OR LIBC_GPU_BUILD)) if(LIBC_HDRGEN_EXE) set(hdrgen_exe ${LIBC_HDRGEN_EXE}) else() @@ -441,7 +441,7 @@ if(runtimes) set(libc_cmake_args "-DLIBC_HDRGEN_EXE=${hdrgen_exe}" "-DLLVM_LIBC_FULL_BUILD=ON") list(APPEND extra_deps ${hdrgen_deps}) - if(LIBC_GPU_BUILD OR LIBC_GPU_ARCHITECTURES) + if(LIBC_GPU_BUILD) foreach(dep clang-offload-packager nvptx-arch amdgpu-arch) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt index 17e61d0bc47dc..8e51e189ab953 100644 --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -121,8 +121,7 @@ pythonize_bool(LIBOMPTARGET_OMPT_SUPPORT) # Check if this build supports the GPU libc. set(LIBC_GPU_SUPPORT FALSE) -if("libc" IN_LIST LLVM_ENABLE_RUNTIMES AND (LIBC_GPU_BUILD OR - LIBC_GPU_ARCHITECTURES)) +if("libc" IN_LIST LLVM_ENABLE_RUNTIMES AND LIBC_GPU_BUILD) set(LIBC_GPU_SUPPORT TRUE) endif()