diff --git a/.circleci/config.yml b/.circleci/config.yml index 255feec7b..dd8a3a310 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -92,6 +92,9 @@ jobs: cmake_args: type: string default: "" + build_openssl: + type: boolean + default: false docker: - image: << parameters.docker_image >> steps: @@ -101,6 +104,19 @@ jobs: command: | apt-get update apt-get install -y build-essential cmake libibverbs-dev libssl-dev << parameters.apt_get >> + - when: + condition: << parameters.build_openssl >> + steps: + - run: + name: Install openssl + command: | + apt-get install -y wget perl + wget -q https://www.openssl.org/source/openssl-1.1.1b.tar.gz + tar -xzf openssl-1.1.1b.tar.gz + cd openssl-1.1.1b + ./config --prefix=/opt/openssl --openssldir=/opt/openssl/ssl + make -j 2 + make install - run: name: Install libuv command: | @@ -236,6 +252,13 @@ workflows: name: cuda10.1-all-transports docker_image: nvidia/cuda:10.1-devel-ubuntu18.04 cmake_args: -DUSE_IBVERBS=ON -DUSE_LIBUV=ON -DUSE_TCP_OPENSSL_LINK=ON -DUSE_CUDA=ON + - build: + name: cuda11.7-all-transports + apt_get: "gcc-9 g++-9" + docker_image: nvidia/cuda:11.7.1-devel-ubuntu22.04 + cmake_compiler: -DCMAKE_C_COMPILER=gcc-9 -DCMAKE_CXX_COMPILER=g++-9 -DCMAKE_CUDA_HOST_COMPILER=g++-9 + cmake_args: -DUSE_IBVERBS=ON -DUSE_LIBUV=ON -DUSE_TCP_OPENSSL_LINK=ON -DUSE_CUDA=ON -DGLOO_USE_CUDA_TOOLKIT=ON -DOPENSSL_ROOT_DIR=/opt/openssl/ + build_openssl: true - build: name: tsan-all-transports docker_image: ubuntu:18.04 diff --git a/CMakeLists.txt b/CMakeLists.txt index bb2f8b7d1..b0db65e69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,8 @@ option(USE_TCP_OPENSSL_LOAD "Build TCP-TLS transport with OpenSSL dynamically lo if(${USE_TCP_OPENSSL_LINK} AND ${USE_TCP_OPENSSL_LOAD}) message(FATAL_ERROR "USE_TCP_OPENSSL_LINK and USE_TCP_OPENSSL_LOAD are mutually exclusive") endif() +option(USE_CUDA "Build with CUDA support" OFF) +option(GLOO_USE_CUDA_TOOLKIT "Build CUDA with FindCUDATookit.cmake and enable_language(CUDA)" OFF) if(MSVC) message(STATUS "MSVC detected") diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake index e11f8d47e..d2ca2122b 100644 --- a/cmake/Cuda.cmake +++ b/cmake/Cuda.cmake @@ -1,8 +1,6 @@ # Known NVIDIA GPU achitectures Gloo can be compiled for. # This list will be used for CUDA_ARCH_NAME = All option -set(gloo_known_gpu_archs "30 35 50 52 60 61 70") -set(gloo_known_gpu_archs7 "30 35 50 52") -set(gloo_known_gpu_archs8 "30 35 50 52 60 61") +set(gloo_known_gpu_archs "") ################################################################################ # Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME @@ -104,78 +102,88 @@ function(gloo_list_append_if_unique list) endfunction() ################################################################################ -# Short command for cuda compilation -# Usage: -# gloo_cuda_compile( ) -macro(gloo_cuda_compile objlist_variable) - foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) - set(${var}_backup_in_cuda_compile_ "${${var}}") - endforeach() +### Non macro section +################################################################################ - if(APPLE) - list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function) - endif() +if(GLOO_USE_CUDA_TOOLKIT) + find_package(CUDAToolkit 7.0 REQUIRED) + set(GLOO_CUDA_VERSION ${CUDAToolkit_VERSION}) - cuda_compile(cuda_objcs ${ARGN}) + # Convert -O2 -Xcompiler="-O2 -Wall" to "-O2;-Xcompiler=-O2,-Wall" + separate_arguments(GLOO_NVCC_FLAGS UNIX_COMMAND "${CMAKE_CUDA_FLAGS}") + string(REPLACE " " "," GLOO_NVCC_FLAGS "${GLOO_NVCC_FLAGS}") - foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) - set(${var} "${${var}_backup_in_cuda_compile_}") - unset(${var}_backup_in_cuda_compile_) - endforeach() + if(CUDA_USE_STATIC_CUDA_RUNTIME) + set(GLOO_CUDA_LIBRARIES CUDA::cudart_static) + else() + set(GLOO_CUDA_LIBRARIES CUDA::cudart) + endif() +else() + find_package(CUDA 7.0) + if(NOT CUDA_FOUND) + return() + endif() + set(GLOO_CUDA_VERSION ${CUDA_VERSION}) + set(GLOO_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") - set(${objlist_variable} ${cuda_objcs}) -endmacro() + include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) + set(GLOO_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY}) -################################################################################ -### Non macro section -################################################################################ + # If the project including us doesn't set any -std=xxx directly, we set it to C++11 here. + set(CUDA_PROPAGATE_HOST_FLAGS OFF) + if((NOT "${GLOO_NVCC_FLAGS}" MATCHES "-std=c\\+\\+") AND (NOT "${GLOO_NVCC_FLAGS}" MATCHES "-std=gnu\\+\\+")) + if(NOT MSVC) + gloo_list_append_if_unique(GLOO_NVCC_FLAGS "-std=c++11") + endif() + endif() -find_package(CUDA 7.0) -if(NOT CUDA_FOUND) - return() + mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) + mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) endif() set(HAVE_CUDA TRUE) -message(STATUS "CUDA detected: " ${CUDA_VERSION}) -if (${CUDA_VERSION} LESS 8.0) - set(gloo_known_gpu_archs ${gloo_known_gpu_archs7}) - list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") - list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") -elseif (${CUDA_VERSION} LESS 9.0) - set(gloo_known_gpu_archs ${gloo_known_gpu_archs8}) - list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") - list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") +message(STATUS "CUDA detected: " ${GLOO_CUDA_VERSION}) +if (${GLOO_CUDA_VERSION} LESS 9.0) + list(APPEND GLOO_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND GLOO_NVCC_FLAGS "-D__STRICT_ANSI__") else() - # CUDA 8 may complain that sm_20 is no longer supported. Suppress the warning for now. - list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") + # nvcc may complain that sm_xx is no longer supported. Suppress the warning for now. + list(APPEND GLOO_NVCC_FLAGS "-Wno-deprecated-gpu-targets") +endif() + +if(GLOO_CUDA_VERSION VERSION_LESS 8.0) + set(gloo_known_gpu_archs "30 35 50 52") +elseif(GLOO_CUDA_VERSION VERSION_LESS 9.0) + set(gloo_known_gpu_archs "30 35 50 52 60 61") +elseif(GLOO_CUDA_VERSION VERSION_LESS 10.0) + set(gloo_known_gpu_archs "30 35 50 52 60 61 70") +elseif(GLOO_CUDA_VERSION VERSION_LESS 11.0) + set(gloo_known_gpu_archs "35 50 52 60 61 70 75") +elseif(GLOO_CUDA_VERSION VERSION_LESS 12.0) + set(gloo_known_gpu_archs "35 50 52 60 61 70 75 80 86") endif() -include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) -list(APPEND gloo_DEPENDENCY_LIBS ${CUDA_CUDART_LIBRARY}) +list(APPEND gloo_cuda_DEPENDENCY_LIBS ${GLOO_CUDA_LIBRARIES}) # Setting nvcc arch flags (or inherit if already set) -if (NOT ";${CUDA_NVCC_FLAGS};" MATCHES ";-gencode;") +if (NOT ";${GLOO_NVCC_FLAGS};" MATCHES ";-gencode;") gloo_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) - list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) + list(APPEND GLOO_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}") endif() # Disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc. foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used) - gloo_list_append_if_unique(CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag}) + gloo_list_append_if_unique(GLOO_NVCC_FLAGS -Xcudafe --diag_suppress=${diag}) endforeach() -# If the project including us doesn't set any -std=xxx directly, we set it to C++11 here. -set(CUDA_PROPAGATE_HOST_FLAGS OFF) -if((NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=c\\+\\+") AND (NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=gnu\\+\\+")) - if(NOT MSVC) - gloo_list_append_if_unique(CUDA_NVCC_FLAGS "-std=c++11") - endif() -endif() - if(NOT MSVC) - gloo_list_append_if_unique(CUDA_NVCC_FLAGS "-Xcompiler" "-fPIC") + gloo_list_append_if_unique(GLOO_NVCC_FLAGS "-Xcompiler" "-fPIC") endif() -mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) -mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) +if(GLOO_USE_CUDA_TOOLKIT) + # Convert list to space-separated string + string(REPLACE ";" " " CMAKE_CUDA_FLAGS "${GLOO_NVCC_FLAGS}") +else() + set(CUDA_NVCC_FLAGS "${GLOO_NVCC_FLAGS}") +endif() diff --git a/gloo/CMakeLists.txt b/gloo/CMakeLists.txt index 0b416e9cd..d5e6a1a55 100644 --- a/gloo/CMakeLists.txt +++ b/gloo/CMakeLists.txt @@ -140,7 +140,12 @@ configure_file(config.h.in config.h) add_library(gloo ${GLOO_STATIC_OR_SHARED} ${GLOO_SRCS}) if(USE_CUDA) - cuda_add_library(gloo_cuda ${GLOO_CUDA_SRCS} ${GLOO_STATIC_OR_SHARED}) + if(GLOO_USE_CUDA_TOOLKIT) + enable_language(CUDA) + add_library(gloo_cuda ${GLOO_STATIC_OR_SHARED} ${GLOO_CUDA_SRCS}) + else() + cuda_add_library(gloo_cuda ${GLOO_CUDA_SRCS} ${GLOO_STATIC_OR_SHARED}) + endif() target_link_libraries(gloo_cuda gloo ${gloo_cuda_DEPENDENCY_LIBS}) endif() if(USE_ROCM) diff --git a/gloo/benchmark/CMakeLists.txt b/gloo/benchmark/CMakeLists.txt index a470daa8a..74da2c04b 100644 --- a/gloo/benchmark/CMakeLists.txt +++ b/gloo/benchmark/CMakeLists.txt @@ -18,8 +18,8 @@ if(USE_CUDA) "${CMAKE_CURRENT_SOURCE_DIR}/runner.cc" ) - cuda_add_executable(benchmark_cuda ${GLOO_BENCHMARK_CUDA_SRCS}) - target_link_libraries(benchmark_cuda gloo_cuda) + add_executable(benchmark_cuda ${GLOO_BENCHMARK_CUDA_SRCS}) + target_link_libraries(benchmark_cuda gloo_cuda ${GLOO_CUDA_LIBRARIES}) if(GLOO_INSTALL) install(TARGETS benchmark_cuda DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) diff --git a/gloo/test/CMakeLists.txt b/gloo/test/CMakeLists.txt index 73ece2bfa..743e089ee 100644 --- a/gloo/test/CMakeLists.txt +++ b/gloo/test/CMakeLists.txt @@ -50,8 +50,13 @@ if(USE_CUDA) "${CMAKE_CURRENT_SOURCE_DIR}/main.cc" ) - cuda_add_executable(gloo_test_cuda ${GLOO_TEST_CUDA_SRCS}) - target_link_libraries(gloo_test_cuda gloo_cuda gtest OpenSSL::SSL OpenSSL::Crypto) + if(GLOO_USE_CUDA_TOOLKIT) + enable_language(CUDA) + add_executable(gloo_test_cuda ${GLOO_TEST_CUDA_SRCS}) + else() + cuda_add_executable(gloo_test_cuda ${GLOO_TEST_CUDA_SRCS}) + endif() + target_link_libraries(gloo_test_cuda gloo_cuda gtest OpenSSL::SSL OpenSSL::Crypto ${GLOO_CUDA_LIBRARIES}) endif() endif()