Skip to content

Commit

Permalink
Upgrade to LLVM 17, CUDA 12.2, and CuDNN 8.9.4
Browse files Browse the repository at this point in the history
This is updating TF's default toolchain to LLVM 17, as well as
CUDA and cuDNN to the latest releases.

PiperOrigin-RevId: 566403707
  • Loading branch information
tensorflower-gardener committed Sep 18, 2023
1 parent 3a67329 commit 3de4416
Show file tree
Hide file tree
Showing 19 changed files with 156 additions and 125 deletions.
44 changes: 25 additions & 19 deletions .bazelrc
Expand Up @@ -238,17 +238,17 @@ build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
# See https://developer.nvidia.com/cuda-gpus#compute
# TODO(angerson, perfinion): What does sm_ vs compute_ mean? How can users
# select a good value for this? See go/tf-pip-cuda
build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80"
build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_60,sm_70,sm_75,compute_80"

# Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
build:cuda_clang_official --config=cuda_clang
build:cuda_clang_official --action_env=TF_CUDA_VERSION="11"
build:cuda_clang_official --action_env=TF_CUDA_VERSION="12"
build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8"
build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.8"
build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.2"
build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-16/bin/clang"
build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/tensorrt/lib"
build:cuda_clang_official --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain"
build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang"
build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
build:cuda_clang_official --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain"

# Debug config
build:dbg -c dbg
Expand Down Expand Up @@ -454,14 +454,14 @@ build:rbe_linux --host_linkopt=-lm

build:rbe_linux_cpu --config=rbe_linux
# Linux cpu and cuda builds share the same toolchain now.
build:rbe_linux_cpu --host_crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain"
build:rbe_linux_cpu --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain"
build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain-linux-x86_64"
build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang_config_platform//:platform"
build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang_config_platform//:platform"
build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang_config_platform//:platform"
build:rbe_linux_cpu --host_crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain"
build:rbe_linux_cpu --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain"
build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain-linux-x86_64"
build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform"
build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform"
build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang17_config_platform//:platform"
# Python config is the same across all containers because the binary is the same
build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang_config_python"
build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python"
build:rbe_linux_cpu --python_path="/usr/bin/python3"
# These you may need to change for your own GCP project.
common:rbe_linux_cpu --remote_instance_name=projects/tensorflow-testing/instances/default_instance
Expand All @@ -484,9 +484,9 @@ build:rbe_linux_cuda --config=cuda_clang_official
build:rbe_linux_cuda --config=rbe_linux_cpu
# For Remote build execution -- GPU configuration
build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang_config_cuda"
build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang_config_tensorrt"
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang_config_nccl"
build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang17_config_cuda"
build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang17_config_tensorrt"
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang17_config_nccl"
test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"

# TODO(kanglan): Remove rbe_win and rbe_win_py3* after b/289091160 is fixed
Expand Down Expand Up @@ -548,25 +548,31 @@ test:release_base --test_size_filters=small,medium
# Target the AVX instruction set
build:release_cpu_linux --config=avx_linux
# Use the Clang toolchain to compile
build:release_cpu_linux --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain"
build:release_cpu_linux --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain"
# Disable clang extention that rejects type definitions within offsetof.
# This was added in clang-16 by https://reviews.llvm.org/D133574.
# Can be removed once upb is updated, since a type definition is used within
# offset of in the current version of ubp.
# See https://github.com/protocolbuffers/upb/blob/9effcbcb27f0a665f9f345030188c0b291e32482/upb/upb.c#L183.
build:release_cpu_linux --copt=-Wno-gnu-offsetof-extensions
build:release_cpu_linux --copt=-Wno-error=array-parameter
build:release_cpu_linux --copt=-Wno-error=unused-command-line-argument
# Set lld as the linker.
build:release_cpu_linux --linkopt="-fuse-ld=lld"
build:release_cpu_linux --linkopt="-lm"

# We have some invalid linker scripts in the build,
# so we need to disable this check
build:release_cpu_linux --linkopt=-Wl,--undefined-version

# Container environment settings below this point.
# Use Python 3.X as installed in container image
build:release_cpu_linux --action_env PYTHON_BIN_PATH="/usr/bin/python3"
build:release_cpu_linux --action_env PYTHON_LIB_PATH="/usr/lib/tf_python"
build:release_cpu_linux --python_path="/usr/bin/python3"
# Set Clang as compiler. Use the actual path to clang installed in container.
build:release_cpu_linux --repo_env=CC="/usr/lib/llvm-16/bin/clang"
build:release_cpu_linux --repo_env=BAZEL_COMPILER="/usr/lib/llvm-16/bin/clang"
build:release_cpu_linux --repo_env=CC="/usr/lib/llvm-17/bin/clang"
build:release_cpu_linux --repo_env=BAZEL_COMPILER="/usr/lib/llvm-17/bin/clang"
# Store performance profiling log in the mounted artifact directory.
# The profile can be viewed by visiting chrome://tracing in a Chrome browser.
# See https://docs.bazel.build/versions/main/skylark/performance.html#performance-profiling
Expand Down
12 changes: 6 additions & 6 deletions ci/official/bazelrcs/cuda.bazelrc
Expand Up @@ -53,14 +53,14 @@ build --@local_config_cuda//:enable_cuda
build --@local_config_cuda//:cuda_compiler=clang
build --repo_env TF_NEED_CUDA=1
build --config cuda_clang
build --action_env=TF_CUDA_VERSION="11"
build --action_env=TF_CUDNN_VERSION="8"
build --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.8"
build --action_env=TF_CUDA_VERSION="12"
build --action_env=TF_CUDNN_VERSION="2"
build --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.2"
build --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-16/bin/clang"
build --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang"
build --action_env=TF_CUDA_CLANG="1"
build --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/tensorrt/lib"
build --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain"
build --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain"

# CUDA: Enable TensorRT optimizations
# https://developer.nvidia.com/tensorrt
Expand All @@ -71,7 +71,7 @@ build --repo_env TF_NEED_TENSORRT=1
# See https://developer.nvidia.com/cuda-gpus#compute
# TODO(angerson, perfinion): What does sm_ vs compute_ mean?
# TODO(angerson, perfinion): How can users select a good value for this?
build --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80"
build --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_60,sm_70,sm_75,compute_80"

# Test-related settings below this point.
test --build_tests_only --keep_going --test_output=errors --verbose_failures=true
Expand Down
2 changes: 1 addition & 1 deletion configure.py
Expand Up @@ -608,7 +608,7 @@ def prompt_loop_or_load_from_env(environ_cp,

def set_clang_cuda_compiler_path(environ_cp):
"""Set CLANG_CUDA_COMPILER_PATH."""
default_clang_path = '/usr/lib/llvm-16/bin/clang'
default_clang_path = '/usr/lib/llvm-17/bin/clang'
if not os.path.exists(default_clang_path):
default_clang_path = which('clang') or ''

Expand Down
2 changes: 2 additions & 0 deletions tensorflow/python/compiler/tensorrt/BUILD
Expand Up @@ -117,6 +117,8 @@ cuda_py_strict_test(
python_version = "PY3",
tags = [
"no_cuda_on_cpu_tap",
# TODO(b/297490791): Reenable after TensorRT regression has been fixed
"no_oss",
"no_pip",
"nomac",
],
Expand Down
3 changes: 2 additions & 1 deletion tensorflow/python/compiler/tensorrt/test/BUILD
Expand Up @@ -221,7 +221,8 @@ cuda_py_strict_test(
name = "binary_tensor_weight_broadcast_test",
srcs = ["binary_tensor_weight_broadcast_test.py"],
python_version = "PY3",
tags = base_tags,
# TODO(b/297490791): Reenable after TensorRT regression has been fixed
tags = base_tags + ["no_oss"],
xla_enable_strict_auto_jit = False,
deps = [
":tf_trt_integration_test_base_srcs",
Expand Down
26 changes: 15 additions & 11 deletions tensorflow/tools/pip_package/setup.py
Expand Up @@ -163,17 +163,21 @@ def standard_or_nightly(standard, nightly):
EXTRA_PACKAGES = {}
EXTRA_PACKAGES['and-cuda'] = [
# TODO(nluehr): set nvidia-* versions based on build components.
'nvidia-cuda-runtime-cu11 == 11.8.89',
'nvidia-cublas-cu11 == 11.11.3.6',
'nvidia-cufft-cu11 == 10.9.0.58',
'nvidia-cudnn-cu11 == 8.7.0.84',
'nvidia-curand-cu11 == 10.3.0.86',
'nvidia-cusolver-cu11 == 11.4.1.48',
'nvidia-cusparse-cu11 == 11.7.5.86',
'nvidia-nccl-cu11 == 2.16.5',
'nvidia-cuda-cupti-cu11 == 11.8.87',
'nvidia-cuda-nvcc-cu11 == 11.8.89',
'tensorrt == 8.5.3.1',
'nvidia-cublas-cu12 == 12.2.5.6',
'nvidia-cuda-cupti-cu12 == 12.2.142',
'nvidia-cuda-nvcc-cu12 == 12.2.140',
'nvidia-cuda-nvrtc-cu12 == 12.2.140',
'nvidia-cuda-runtime-cu12 == 12.2.140',
'nvidia-cudnn-cu12 == 8.9.4.25',
'nvidia-cufft-cu12 == 11.0.8.103',
'nvidia-curand-cu12 == 10.3.3.141',
'nvidia-cusolver-cu12 == 11.5.2.141',
'nvidia-cusparse-cu12 == 12.1.2.141',
'nvidia-nccl-cu12 == 2.16.5',
'nvidia-nvjitlink-cu12 == 12.2.140',
'tensorrt == 8.6.1.post1',
'tensorrt-bindings == 8.6.1',
'tensorrt-libs == 8.6.1',
]

DOCLINES = __doc__.split('\n')
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile
Expand Up @@ -16,7 +16,7 @@ COPY builder.devtoolset/glibc2.17-inline.patch /glibc2.17-inline.patch
RUN /build_devtoolset.sh devtoolset-9 /dt9

################################################################################
FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as devel
FROM nvidia/cuda:12.2.0-base-ubuntu20.04 as devel
################################################################################
COPY --from=builder /dt9 /dt9

Expand Down
41 changes: 21 additions & 20 deletions tensorflow/tools/tf_sig_build_dockerfiles/devel.packages.txt
@@ -1,33 +1,34 @@
# All required CUDA packages
cuda-command-line-tools-11-8
cuda-cudart-dev-11-8
cuda-nvcc-11-8
cuda-cupti-11-8
cuda-nvprune-11-8
cuda-libraries-11-8
cuda-libraries-dev-11-8
libcufft-11-8
libcusolver-dev-11-8
libcusparse-dev-11-8
libcublas-dev-11-8
cuda-command-line-tools-12-2
cuda-cudart-dev-12-2
cuda-nvcc-12-2
cuda-cupti-12-2
cuda-nvprune-12-2
cuda-libraries-12-2
cuda-libraries-dev-12-2
libcufft-12-2
libcurand-12-2
libcusolver-dev-12-2
libcusparse-dev-12-2
libcublas-dev-12-2
# CuDNN: https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#ubuntu-network-installation
libcudnn8-dev=8.6.0.163-1+cuda11.8
libcudnn8=8.6.0.163-1+cuda11.8
libcudnn8-dev=8.9.4.25-1+cuda12.2
libcudnn8=8.9.4.25-1+cuda12.2
# TensorRT: See https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#maclearn-net-repo-install-rpm
libnvinfer-plugin8=8.4.3-1+cuda11.6
libnvinfer8=8.4.3-1+cuda11.6
libnvinfer-dev=8.4.3-1+cuda11.6
libnvinfer-plugin-dev=8.4.3-1+cuda11.6
libnvinfer-plugin8=8.6.1.6-1+cuda12.0
libnvinfer8=8.6.1.6-1+cuda12.0
libnvinfer-dev=8.6.1.6-1+cuda12.0
libnvinfer-plugin-dev=8.6.1.6-1+cuda12.0

# Other build-related tools
apt-transport-https
autoconf
automake
build-essential
ca-certificates
llvm-16
clang-16
lld-16
llvm-17
clang-17
lld-17
clang-format-12
colordiff
curl
Expand Down
Expand Up @@ -10,7 +10,7 @@ build:sigbuild_remote_cache_push --remote_cache="https://storage.googleapis.com/
# Change the value of CACHEBUSTER when upgrading the toolchain, or when testing
# different compilation methods. E.g. for a PR to test a new CUDA version, set
# the CACHEBUSTER to the PR number.
build --action_env=CACHEBUSTER=501872366
build --action_env=CACHEBUSTER=565341047

# Build options for CPU Linux
build --config=release_cpu_linux
Expand Down
Expand Up @@ -10,7 +10,7 @@ build:sigbuild_remote_cache_push --remote_cache="https://storage.googleapis.com/
# Change the value of CACHEBUSTER when upgrading the toolchain, or when testing
# different compilation methods. E.g. for a PR to test a new CUDA version, set
# the CACHEBUSTER to the PR number.
build --action_env=CACHEBUSTER=501872366
build --action_env=CACHEBUSTER=565341047

# Build options for GPU Linux
build --config=release_gpu_linux
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/tools/tf_sig_build_dockerfiles/setup.sources.sh
Expand Up @@ -48,6 +48,6 @@ deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main
deb-src http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main
# LLVM/Clang repository
deb http://apt.llvm.org/focal/ llvm-toolchain-focal-16 main
deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-16 main
deb http://apt.llvm.org/focal/ llvm-toolchain-focal-17 main
deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-17 main
SOURCES
14 changes: 7 additions & 7 deletions tensorflow/tools/toolchains/remote_config/configs.bzl
Expand Up @@ -659,10 +659,10 @@ def initialize_rbe_configs():

sigbuild_tf_configs(
name_container_map = {
"sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:814b2d6727d89792ab29f8a6486ae533427f5548f63c012e03108a8485e5a5a7",
"sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:814b2d6727d89792ab29f8a6486ae533427f5548f63c012e03108a8485e5a5a7",
"sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:d671adfcd7ad17c37ee0889f4277187b66b381e5921963d6b76ea830a102db4d",
"sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:08ff1d229f5bab5fb2c037eb9c7bf34646960882157239b90ec20794b105c1f5",
"sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:77b26125af4e2c6d7955a94d8d75f6dbb1e35a33db5bdaa915806110527ab927",
"sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:77b26125af4e2c6d7955a94d8d75f6dbb1e35a33db5bdaa915806110527ab927",
"sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:fa47f1bc501983fb57e7af0e04f3c45051e42129640ef4d4a10e829d255f11ac",
"sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:6935af1dd34f2f1d663ce1a6c63b3e96595ac9fefdf1e587a9bc53f2bfbf0c47",
},
# Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12
# and manylinux2014 is 2.17.
Expand All @@ -685,12 +685,12 @@ def initialize_rbe_configs():
"TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu",
"TF_CUDA_CLANG": "1",
"TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0",
"TF_CUDA_VERSION": "11.8",
"TF_CUDNN_VERSION": "8.1",
"TF_CUDA_VERSION": "12.2",
"TF_CUDNN_VERSION": "8.9",
"TF_ENABLE_XLA": "1",
"TF_NEED_CUDA": "1",
"TF_NEED_TENSORRT": "1",
"TF_SYSROOT": "/dt9",
"TF_TENSORRT_VERSION": "7.2",
"TF_TENSORRT_VERSION": "8.6",
},
)

0 comments on commit 3de4416

Please sign in to comment.