Skip to content

Commit

Permalink
[Release/1.9] Link whole CuDNN for CUDA-11.1 (#59873)
Browse files Browse the repository at this point in the history
* Move cublas dependency after CuDNN (#58287)

Summary:
Library linking order matters during static linking
Not sure whether its a bug or a feature, but if cublas is reference
before CuDNN, it will be partially statically linked into the library,
even if it is not used

Pull Request resolved: #58287

Reviewed By: janeyx99

Differential Revision: D28433165

Pulled By: malfet

fbshipit-source-id: 8dffa0533075126dc383428f838f7d048074205c

* [CMake] Split caffe2::cudnn into public and private (#59721)

Summary:
This is only important for builds where cuDNN is linked statically into libtorch_cpu.
Before this PR PyTorch wheels often accidentally contained several partial copies of cudnn_static library.
Splitting the interface into header only (cudnn-public) and library+headers(cudnn-private) prevents those from happening.
Preliminary step towards enabling optional linking whole cudnn_library to workaround issue reported in #50153

Pull Request resolved: #59721

Reviewed By: ngimel

Differential Revision: D29000967

Pulled By: malfet

fbshipit-source-id: f054df92b265e9494076ab16c247427b39da9336

* Add USE_WHOLE_CUDNN option (#59744)

Summary:
It is only enabled if USE_STATIC_CUDNN is enabled

Next step after #59721 towards resolving fast kernels stripping reported in #50153

Pull Request resolved: #59744

Reviewed By: seemethere, ngimel

Differential Revision: D29007314

Pulled By: malfet

fbshipit-source-id: 7091e299c0c6cc2a8aa82fbf49312cecf3bb861a

* [Binary] Link whole CuDNN for CUDA-11.1 (#59802)

Summary:
Fixes #50153

Pull Request resolved: #59802

Reviewed By: driazati, seemethere

Differential Revision: D29033537

Pulled By: malfet

fbshipit-source-id: e816fc71f273ae0b4ba8a0621d5368a2078561a1
  • Loading branch information
malfet committed Jun 11, 2021
1 parent 43c581a commit 90e6773
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 39 deletions.
9 changes: 8 additions & 1 deletion .circleci/scripts/binary_populate_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ if [[ ${DESIRED_CUDA} == "cpu" ]]; then
USE_GOLD_LINKER="ON"
fi

USE_WHOLE_CUDNN="OFF"
# Link whole cuDNN for CUDA-11.1 to include fp16 fast kernels
if [[ "$(uname)" == "Linux" && "${DESIRED_CUDA}" == "cu111" ]]; then
USE_WHOLE_CUDNN="ON"
fi

# Default to nightly, since that's where this normally uploads to
PIP_UPLOAD_FOLDER='nightly/'
# We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it
Expand Down Expand Up @@ -176,7 +182,8 @@ export CIRCLE_BRANCH="$CIRCLE_BRANCH"
export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
export USE_GOLD_LINKER="${USE_GOLD_LINKER}"
export USE_GLOO_WITH_OPENSSL=1
export USE_GLOO_WITH_OPENSSL="ON"
export USE_WHOLE_CUDNN="${USE_WHOLE_CUDNN}"
# =================== The above code will be executed inside Docker container ===================
EOL

Expand Down
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ cmake_dependent_option(
cmake_dependent_option(
USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
"USE_CUDNN" OFF)
cmake_dependent_option(
USE_WHOLE_CUDNN "Use whole-library linking for cuDNN" OFF
"USE_STATIC_CUDNN" OFF)
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
option(USE_KINETO "Use Kineto profiling library" ON)
option(USE_CUPTI_SO "Use CUPTI as a shared library" OFF)
Expand Down
8 changes: 8 additions & 0 deletions caffe2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1468,6 +1468,10 @@ if(BUILD_SPLIT_CUDA)
target_link_libraries(
torch_cuda_cpp PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS})
target_link_libraries(torch_cuda_cu PRIVATE torch_cuda_cpp)
if(USE_CUDNN)
target_link_libraries(
torch_cuda_cpp PRIVATE caffe2::cudnn-private)
endif()

# These public dependencies must go after the previous dependencies, as the
# order of the libraries in the linker call matters here when statically
Expand All @@ -1484,6 +1488,10 @@ elseif(USE_CUDA)
torch_cuda PRIVATE ${Caffe2_GPU_INCLUDE})
target_link_libraries(
torch_cuda PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS})
if(USE_CUDNN)
target_link_libraries(
torch_cuda PRIVATE caffe2::cudnn-private)
endif()

# These public dependencies must go after the previous dependencies, as the
# order of the libraries in the linker call matters here when statically
Expand Down
2 changes: 1 addition & 1 deletion cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1165,7 +1165,7 @@ if(USE_CUDA)
caffe2_update_option(USE_NVRTC OFF)
endif()
if(CAFFE2_USE_CUDNN)
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn)
list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn-public)
else()
caffe2_update_option(USE_CUDNN OFF)
endif()
Expand Down
6 changes: 5 additions & 1 deletion cmake/Summary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,12 @@ function(caffe2_print_configuration_summary)
get_target_property(__tmp caffe2::curand IMPORTED_LOCATION)
message(STATUS " curand library : ${__tmp}")
if(${USE_CUDNN})
get_target_property(__tmp caffe2::cudnn IMPORTED_LOCATION)
get_target_property(__tmp caffe2::cudnn-public INTERFACE_LINK_LIBRARIES)
message(STATUS " cuDNN library : ${__tmp}")
if(${CUDNN_STATIC})
get_target_property(__tmp caffe2::cudnn-private INTERFACE_LINK_LIBRARIES)
message(STATUS " cuDNN static library: ${__tmp}")
endif()
endif()
get_target_property(__tmp caffe2::nvrtc IMPORTED_LOCATION)
message(STATUS " nvrtc : ${__tmp}")
Expand Down
95 changes: 59 additions & 36 deletions cmake/public/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -272,20 +272,66 @@ else()
${LIBNVTOOLSEXT})
endif()

# cudnn
# cublas. CUDA_CUBLAS_LIBRARIES is actually a list, so we will make an
# interface library similar to cudart.
add_library(caffe2::cublas INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
set_property(
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a")
if(CUDA_VERSION VERSION_GREATER_EQUAL 10.1)
set_property(
TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublasLt_static.a")
# Add explicit dependency to cudart_static to fix
# libcublasLt_static.a.o): undefined reference to symbol 'cudaStreamWaitEvent'
# error adding symbols: DSO missing from command line
set_property(
TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
"${CUDA_cudart_static_LIBRARY}" rt dl)
endif()
else()
set_property(
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
${CUDA_CUBLAS_LIBRARIES})
endif()
set_property(
TARGET caffe2::cublas PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${CUDA_INCLUDE_DIRS})

# cudnn public and private interfaces
# static linking is handled by USE_STATIC_CUDNN environment variable
# If library is linked dynamically, than private interface is no-op
# If library is linked statically:
# - public interface would only reference headers
# - private interface will contain the actual link instructions
if(CAFFE2_USE_CUDNN)
add_library(caffe2::cudnn UNKNOWN IMPORTED)
add_library(caffe2::cudnn-public INTERFACE IMPORTED)
set_property(
TARGET caffe2::cudnn PROPERTY IMPORTED_LOCATION
${CUDNN_LIBRARY_PATH})
TARGET caffe2::cudnn-public PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${CUDNN_INCLUDE_PATH})
add_library(caffe2::cudnn-private INTERFACE IMPORTED)
set_property(
TARGET caffe2::cudnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${CUDNN_INCLUDE_PATH})
TARGET caffe2::cudnn-private PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${CUDNN_INCLUDE_PATH})
if(CUDNN_STATIC AND NOT WIN32)
if(USE_WHOLE_CUDNN)
set_property(
TARGET caffe2::cudnn-private PROPERTY INTERFACE_LINK_LIBRARIES
"-Wl,--whole-archive,\"${CUDNN_LIBRARY_PATH}\" -Wl,--no-whole-archive")
else()
set_property(
TARGET caffe2::cudnn-private PROPERTY INTERFACE_LINK_LIBRARIES
${CUDNN_LIBRARY_PATH})
endif()
set_property(
TARGET caffe2::cudnn PROPERTY INTERFACE_LINK_LIBRARIES
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl)
TARGET caffe2::cudnn-private APPEND PROPERTY INTERFACE_LINK_LIBRARIES
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl)
# Add explicit dependency on cublas to cudnn
get_target_property(__tmp caffe2::cublas INTERFACE_LINK_LIBRARIES)
set_property(
TARGET caffe2::cudnn-private APPEND PROPERTY INTERFACE_LINK_LIBRARIES
"${__tmp}")
# Lines below use target_link_libraries because we support cmake 3.5+.
# For cmake 3.13+, target_link_options to set INTERFACE_LINK_OPTIONS would be better.
# https://cmake.org/cmake/help/v3.5/command/target_link_libraries.html warns
Expand All @@ -295,8 +341,12 @@ if(CAFFE2_USE_CUDNN)
# link items that will not propagate to dependents."
# Propagating to a dependent (torch_cuda) is exactly what we want here, so we are
# flouting the warning, but I can't think of a better (3.5+ compatible) way.
target_link_libraries(caffe2::cudnn INTERFACE
target_link_libraries(caffe2::cudnn-private INTERFACE
"-Wl,--exclude-libs,libcudnn_static.a")
else()
set_property(
TARGET caffe2::cudnn-public PROPERTY INTERFACE_LINK_LIBRARIES
${CUDNN_LIBRARY_PATH})
endif()
endif()

Expand Down Expand Up @@ -346,33 +396,6 @@ if(CAFFE2_USE_TENSORRT)
${TENSORRT_INCLUDE_DIR})
endif()

# cublas. CUDA_CUBLAS_LIBRARIES is actually a list, so we will make an
# interface library similar to cudart.
add_library(caffe2::cublas INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
set_property(
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a")
if(CUDA_VERSION VERSION_GREATER_EQUAL 10.1)
set_property(
TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublasLt_static.a")
# Add explicit dependency to cudart_static to fix
# libcublasLt_static.a.o): undefined reference to symbol 'cudaStreamWaitEvent'
# error adding symbols: DSO missing from command line
set_property(
TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
"${CUDA_cudart_static_LIBRARY}" rt dl)
endif()
else()
set_property(
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
${CUDA_CUBLAS_LIBRARIES})
endif()
set_property(
TARGET caffe2::cublas PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${CUDA_INCLUDE_DIRS})

# nvrtc
add_library(caffe2::nvrtc UNKNOWN IMPORTED)
set_property(
Expand Down

0 comments on commit 90e6773

Please sign in to comment.